From d9f92faf40f2970ed86943d120a41982c391af44 Mon Sep 17 00:00:00 2001 From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa> Date: Wed, 27 Aug 2014 16:17:57 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12365 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KOKKOS/Install.sh | 10 +- src/KOKKOS/atom_kokkos.cpp | 108 +- src/KOKKOS/atom_kokkos.h | 24 +- src/KOKKOS/atom_vec_angle_kokkos.cpp | 1886 +++++++++++++++ src/KOKKOS/atom_vec_angle_kokkos.h | 153 ++ src/KOKKOS/atom_vec_atomic_kokkos.cpp | 142 +- src/KOKKOS/atom_vec_bond_kokkos.cpp | 1717 ++++++++++++++ src/KOKKOS/atom_vec_bond_kokkos.h | 141 ++ src/KOKKOS/atom_vec_charge_kokkos.cpp | 1517 ++++++++++++ src/KOKKOS/atom_vec_charge_kokkos.h | 126 + src/KOKKOS/atom_vec_full_kokkos.cpp | 2321 +++++++++++++++++++ src/KOKKOS/atom_vec_full_kokkos.h | 183 ++ src/KOKKOS/atom_vec_kokkos.h | 28 +- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 2236 ++++++++++++++++++ src/KOKKOS/atom_vec_molecular_kokkos.h | 178 ++ src/KOKKOS/comm_kokkos.cpp | 164 +- src/KOKKOS/comm_kokkos.h | 12 + src/KOKKOS/fix_langevin_kokkos.cpp | 810 +++++++ src/KOKKOS/fix_langevin_kokkos.h | 266 +++ src/KOKKOS/fix_nve_kokkos.cpp | 2 +- src/KOKKOS/kokkos.cpp | 8 +- src/KOKKOS/kokkos_type.h | 157 +- src/KOKKOS/neigh_full_kokkos.h | 295 ++- src/KOKKOS/neigh_list_kokkos.cpp | 2 +- src/KOKKOS/neigh_list_kokkos.h | 2 +- src/KOKKOS/neighbor_kokkos.cpp | 61 +- src/KOKKOS/neighbor_kokkos.h | 138 +- src/KOKKOS/pair_coul_cut_kokkos.cpp | 266 +++ src/KOKKOS/pair_coul_cut_kokkos.h | 137 ++ src/KOKKOS/pair_kokkos.h | 462 ++-- src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp | 347 +++ src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h | 131 ++ src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp | 496 ++++ src/KOKKOS/pair_lj_cut_coul_long_kokkos.h | 147 ++ src/KOKKOS/pair_lj_cut_kokkos.cpp | 14 +- src/KOKKOS/pair_lj_cut_kokkos.h | 11 +- src/KOKKOS/pair_table_kokkos.cpp | 141 +- src/KOKKOS/pair_table_kokkos.h | 60 +- src/KOKKOS/verlet_kokkos.cpp | 132 +- 39 files changed, 14362 insertions(+), 669 deletions(-) create mode 100644 src/KOKKOS/atom_vec_angle_kokkos.cpp create mode 100644 src/KOKKOS/atom_vec_angle_kokkos.h create mode 100644 src/KOKKOS/atom_vec_bond_kokkos.cpp create mode 100644 src/KOKKOS/atom_vec_bond_kokkos.h create mode 100644 src/KOKKOS/atom_vec_charge_kokkos.cpp create mode 100644 src/KOKKOS/atom_vec_charge_kokkos.h create mode 100644 src/KOKKOS/atom_vec_full_kokkos.cpp create mode 100644 src/KOKKOS/atom_vec_full_kokkos.h create mode 100644 src/KOKKOS/atom_vec_molecular_kokkos.cpp create mode 100644 src/KOKKOS/atom_vec_molecular_kokkos.h create mode 100644 src/KOKKOS/fix_langevin_kokkos.cpp create mode 100644 src/KOKKOS/fix_langevin_kokkos.h create mode 100644 src/KOKKOS/pair_coul_cut_kokkos.cpp create mode 100644 src/KOKKOS/pair_coul_cut_kokkos.h create mode 100644 src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp create mode 100644 src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h create mode 100644 src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp create mode 100644 src/KOKKOS/pair_lj_cut_coul_long_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 9378eccfc7..8f6e21bf8b 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -40,12 +40,12 @@ if (test $1 = 1) then if (test -e ../Makefile.package) then sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package - sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/kokkos\/core\/src -I../../lib/kokkos/containers/src -DLMP_KOKKOS |' ../Makefile.package - sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package + sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_KOKKOS |' ../Makefile.package +# sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package - sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(kokkos_SYSINC) |' ../Makefile.package - sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(kokkos_SYSLIB) |' ../Makefile.package - sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package + sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(KOKKOS_INC) |' ../Makefile.package + sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(KOKKOS_LINK) |' ../Makefile.package +# sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package fi if (test -e ../Makefile.package.settings) then diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 14dbd7e10e..89c7eea007 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -37,31 +37,43 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp) AtomKokkos::~AtomKokkos() { - k_tag = DAT::tdual_tagint_1d(); - k_mask = DAT::tdual_int_1d(); - k_type = DAT::tdual_int_1d(); - k_image = DAT::tdual_imageint_1d(); - k_molecule = DAT::tdual_tagint_1d(); - - k_x = DAT::tdual_x_array(); - k_v = DAT::tdual_v_array(); - k_f = DAT::tdual_f_array(); - - k_mass = DAT::tdual_float_1d(); - - tag = NULL; - mask = NULL; - type = NULL; - image = NULL; - molecule = NULL; - mass = NULL; - - memory->sfree(x); - memory->sfree(v); - memory->sfree(f); - x = NULL; - v = NULL; - f = NULL; + memory->destroy_kokkos(k_tag, tag); + memory->destroy_kokkos(k_mask, mask); + memory->destroy_kokkos(k_type, type); + memory->destroy_kokkos(k_image, image); + memory->destroy_kokkos(k_molecule, molecule); + + memory->destroy_kokkos(k_x, x); + memory->destroy_kokkos(k_v, v); + memory->destroy_kokkos(k_f, f); + + memory->destroy_kokkos(k_mass, mass); + + memory->destroy_kokkos(k_q,q); + + memory->destroy_kokkos(k_nspecial, nspecial); + memory->destroy_kokkos(k_special, special); + memory->destroy_kokkos(k_num_bond, num_bond); + memory->destroy_kokkos(k_bond_type, bond_type); + memory->destroy_kokkos(k_bond_atom, bond_atom); + memory->destroy_kokkos(k_num_angle, num_angle); + memory->destroy_kokkos(k_angle_type, angle_type); + memory->destroy_kokkos(k_angle_atom1, angle_atom1); + memory->destroy_kokkos(k_angle_atom2, angle_atom2); + memory->destroy_kokkos(k_angle_atom3, angle_atom3); + memory->destroy_kokkos(k_num_dihedral, num_dihedral); + memory->destroy_kokkos(k_dihedral_type, dihedral_type); + memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1); + memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2); + memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3); + memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4); + memory->destroy_kokkos(k_num_improper, num_improper); + memory->destroy_kokkos(k_improper_type, improper_type); + memory->destroy_kokkos(k_improper_atom1, improper_atom1); + memory->destroy_kokkos(k_improper_atom2, improper_atom2); + memory->destroy_kokkos(k_improper_atom3, improper_atom3); + memory->destroy_kokkos(k_improper_atom4, improper_atom4); + } /* ---------------------------------------------------------------------- */ @@ -96,9 +108,6 @@ void AtomKokkos::sort() { int i,m,n,ix,iy,iz,ibin,empty; - sync(Host,ALL_MASK); - modified(Host,ALL_MASK); - // set next timestep for sorting to take place nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq; @@ -122,6 +131,9 @@ void AtomKokkos::sort() if (nlocal == nmax) avec->grow(0); + sync(Host,ALL_MASK); + modified(Host,ALL_MASK); + // bin atoms in reverse order so linked list will be in forward order for (i = 0; i < nbins; i++) binhead[i] = -1; @@ -188,3 +200,43 @@ void AtomKokkos::sort() //MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); //if (flagall) error->all(FLERR,"Atom sort did not operate correctly"); } + +/* ---------------------------------------------------------------------- + reallocate memory to the pointer selected by the mask +------------------------------------------------------------------------- */ +void AtomKokkos::grow(unsigned int mask){ + + if (mask && SPECIAL_MASK){ + memory->destroy_kokkos(k_special, special); + sync(Device, mask); + modified(Device, mask); + memory->grow_kokkos(k_special,special,nmax,maxspecial,"atom:special"); + avec->grow_reset(); + sync(Host, mask); + } + +} + +void AtomKokkos::deallocate_topology() +{ + memory->destroy_kokkos(k_bond_type, bond_type); + memory->destroy_kokkos(k_bond_atom, bond_atom); + + memory->destroy_kokkos(k_angle_type, angle_type); + memory->destroy_kokkos(k_angle_atom1, angle_atom1); + memory->destroy_kokkos(k_angle_atom2, angle_atom2); + memory->destroy_kokkos(k_angle_atom3, angle_atom3); + + memory->destroy_kokkos(k_dihedral_type, dihedral_type); + memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1); + memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2); + memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3); + memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4); + + memory->destroy_kokkos(k_improper_type, improper_type); + memory->destroy_kokkos(k_improper_atom1, improper_atom1); + memory->destroy_kokkos(k_improper_atom2, improper_atom2); + memory->destroy_kokkos(k_improper_atom3, improper_atom3); + memory->destroy_kokkos(k_improper_atom4, improper_atom4); +} + diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 6d75f7edd4..2abf288793 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -21,15 +21,33 @@ namespace LAMMPS_NS { class AtomKokkos : public Atom { public: - DAT::tdual_tagint_1d k_tag, k_molecule; - DAT::tdual_imageint_1d k_image; + DAT::tdual_tagint_1d k_tag; DAT::tdual_int_1d k_type, k_mask; + DAT::tdual_imageint_1d k_image; DAT::tdual_x_array k_x; DAT::tdual_v_array k_v; DAT::tdual_f_array k_f; + DAT::tdual_float_1d k_mass; + DAT::tdual_float_1d k_q; + DAT::tdual_tagint_1d k_molecule; + DAT::tdual_int_2d k_nspecial; + DAT::tdual_tagint_2d k_special; + DAT::tdual_int_1d k_num_bond; + DAT::tdual_int_2d k_bond_type; + DAT::tdual_tagint_2d k_bond_atom; + DAT::tdual_int_1d k_num_angle; + DAT::tdual_int_2d k_angle_type; + DAT::tdual_tagint_2d k_angle_atom1, k_angle_atom2, k_angle_atom3; + DAT::tdual_int_1d k_num_dihedral; + DAT::tdual_int_2d k_dihedral_type; + DAT::tdual_tagint_2d k_dihedral_atom1, k_dihedral_atom2, k_dihedral_atom3, k_dihedral_atom4; + DAT::tdual_int_1d k_num_improper; + DAT::tdual_int_2d k_improper_type; + DAT::tdual_tagint_2d k_improper_atom1, k_improper_atom2, k_improper_atom3, k_improper_atom4; + AtomKokkos(class LAMMPS *); ~AtomKokkos(); @@ -37,6 +55,8 @@ class AtomKokkos : public Atom { void sync(const ExecutionSpace space, unsigned int mask); void modified(const ExecutionSpace space, unsigned int mask); virtual void sort(); + virtual void grow(unsigned int mask); + virtual void deallocate_topology(); }; template<class ViewType, class IndexView> diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp new file mode 100644 index 0000000000..86ef5509a5 --- /dev/null +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -0,0 +1,1886 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "stdlib.h" +#include "atom_vec_angle_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +/* ---------------------------------------------------------------------- */ + +AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 1; + bonds_allow = angles_allow = 1; + mass_type = 1; + + comm_x_only = comm_f_only = 1; + size_forward = 3; + size_reverse = 3; + size_border = 7; + size_velocity = 3; + size_data_atom = 6; + size_data_vel = 4; + xcol_data = 4; + + atom->molecule_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by DELTA + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + + memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); + memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); + memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, + "atom:special"); + memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); + memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, + "atom:bond_type"); + memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, + "atom:bond_atom"); + + memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); + memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, + "atom:angle_type"); + memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, + "atom:angle_atom1"); + memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, + "atom:angle_atom2"); + memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, + "atom:angle_atom3"); + + grow_reset(); + sync(Host,ALL_MASK); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + molecule = atomKK->molecule; + d_molecule = atomKK->k_molecule.d_view; + h_molecule = atomKK->k_molecule.h_view; + nspecial = atomKK->nspecial; + d_nspecial = atomKK->k_nspecial.d_view; + h_nspecial = atomKK->k_nspecial.h_view; + special = atomKK->special; + d_special = atomKK->k_special.d_view; + h_special = atomKK->k_special.h_view; + num_bond = atomKK->num_bond; + d_num_bond = atomKK->k_num_bond.d_view; + h_num_bond = atomKK->k_num_bond.h_view; + bond_type = atomKK->bond_type; + d_bond_type = atomKK->k_bond_type.d_view; + h_bond_type = atomKK->k_bond_type.h_view; + bond_atom = atomKK->bond_atom; + d_bond_atom = atomKK->k_bond_atom.d_view; + h_bond_atom = atomKK->k_bond_atom.h_view; + + num_angle = atomKK->num_angle; + d_num_angle = atomKK->k_num_angle.d_view; + h_num_angle = atomKK->k_num_angle.h_view; + angle_type = atomKK->angle_type; + d_angle_type = atomKK->k_angle_type.d_view; + h_angle_type = atomKK->k_angle_type.h_view; + angle_atom1 = atomKK->angle_atom1; + d_angle_atom1 = atomKK->k_angle_atom1.d_view; + h_angle_atom1 = atomKK->k_angle_atom1.h_view; + angle_atom2 = atomKK->angle_atom2; + d_angle_atom2 = atomKK->k_angle_atom2.d_view; + h_angle_atom2 = atomKK->k_angle_atom2.h_view; + angle_atom3 = atomKK->angle_atom3; + d_angle_atom3 = atomKK->k_angle_atom3.d_view; + h_angle_atom3 = atomKK->k_angle_atom3.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::copy(int i, int j, int delflag) +{ + int k; + + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + mask[j] = mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + + h_molecule(j) = h_molecule(i); + + h_num_bond(j) = h_num_bond(i); + for (k = 0; k < h_num_bond(j); k++) { + h_bond_type(j,k) = h_bond_type(i,k); + h_bond_atom(j,k) = h_bond_atom(i,k); + } + + h_nspecial(j,0) = h_nspecial(i,0); + h_nspecial(j,1) = h_nspecial(i,1); + h_nspecial(j,2) = h_nspecial(i,2); + for (k = 0; k < h_nspecial(j,2); k++) + h_special(j,k) = h_special(i,k); + + h_num_angle(j) = h_num_angle(i); + for (k = 0; k < h_num_angle(j); k++) { + h_angle_type(j,k) = h_angle_type(i,k); + h_angle_atom1(j,k) = h_angle_atom1(i,k); + h_angle_atom2(j,k) = h_angle_atom2(i,k); + h_angle_atom3(j,k) = h_angle_atom3(i,k); + } + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecAngleKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecAngleKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view<DeviceType>().dimension_0() + *buf.view<DeviceType>().dimension_1())/3; + const size_t elements = 3; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecAngleKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecAngleKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecAngleKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecAngleKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_x_array _xw; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecAngleKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, + const int nfirst, const int &pbc_flag, + const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1> + f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,0> + f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,1> + f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,0> + f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1> + f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,0> + f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,1> + f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,0> + f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecAngleKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecAngleKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecAngleKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) + modified(Host,F_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG> +struct AtomVecAngleKokkos_PackBorder { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + typename AT::t_xfloat_2d _buf; + const typename AT::t_int_2d_const _list; + const int _iswap; + const typename AT::t_x_array_randomread _x; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_tagint_1d _molecule; + X_FLOAT _dx,_dy,_dz; + + AtomVecAngleKokkos_PackBorder( + const typename AT::t_xfloat_2d &buf, + const typename AT::t_int_2d_const &list, + const int & iswap, + const typename AT::t_x_array &x, + const typename AT::t_tagint_1d &tag, + const typename AT::t_int_1d &type, + const typename AT::t_int_1d &mask, + const typename AT::t_tagint_1d &molecule, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _molecule(j); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _molecule(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecAngleKokkos_PackBorder<LMPHostType,1> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecAngleKokkos_PackBorder<LMPDeviceType,1> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecAngleKokkos_PackBorder<LMPHostType,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecAngleKokkos_PackBorder<LMPDeviceType,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_molecule(j); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecAngleKokkos_UnpackBorder { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + const typename AT::t_xfloat_2d_const _buf; + typename AT::t_x_array _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_tagint_1d _molecule; + int _first; + + + AtomVecAngleKokkos_UnpackBorder( + const typename AT::t_xfloat_2d_const &buf, + typename AT::t_x_array &x, + typename AT::t_tagint_1d &tag, + typename AT::t_int_1d &type, + typename AT::t_int_1d &mask, + typename AT::t_tagint_1d &molecule, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), + _first(first){ + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast<int> (_buf(i,3)); + _type(i+_first) = static_cast<int> (_buf(i,4)); + _mask(i+_first) = static_cast<int> (_buf(i,5)); + _molecule(i+_first) = static_cast<int> (_buf(i,6)); + + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space) { + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + while (first+n >= nmax) grow(0); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + if(space==Host) { + struct AtomVecAngleKokkos_UnpackBorder<LMPHostType> + f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + struct AtomVecAngleKokkos_UnpackBorder<LMPDeviceType> + f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::unpack_border(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecAngleKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_tagint_1d_randomread _molecule; + typename AT::t_int_2d_randomread _nspecial; + typename AT::t_tagint_2d_randomread _special; + typename AT::t_int_1d_randomread _num_bond; + typename AT::t_int_2d_randomread _bond_type; + typename AT::t_tagint_2d_randomread _bond_atom; + typename AT::t_int_1d_randomread _num_angle; + typename AT::t_int_2d_randomread _angle_type; + typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_tagint_1d _moleculew; + typename AT::t_int_2d _nspecialw; + typename AT::t_tagint_2d _specialw; + typename AT::t_int_1d _num_bondw; + typename AT::t_int_2d _bond_typew; + typename AT::t_tagint_2d _bond_atomw; + typename AT::t_int_1d _num_anglew; + typename AT::t_int_2d _angle_typew; + typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + size_t elements; + + AtomVecAngleKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _molecule(atom->k_molecule.view<DeviceType>()), + _nspecial(atom->k_nspecial.view<DeviceType>()), + _special(atom->k_special.view<DeviceType>()), + _num_bond(atom->k_num_bond.view<DeviceType>()), + _bond_type(atom->k_bond_type.view<DeviceType>()), + _bond_atom(atom->k_bond_atom.view<DeviceType>()), + _num_angle(atom->k_num_angle.view<DeviceType>()), + _angle_type(atom->k_angle_type.view<DeviceType>()), + _angle_atom1(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3(atom->k_angle_atom3.view<DeviceType>()), + _xw(atom->k_x.view<DeviceType>()), + _vw(atom->k_v.view<DeviceType>()), + _tagw(atom->k_tag.view<DeviceType>()), + _typew(atom->k_type.view<DeviceType>()), + _maskw(atom->k_mask.view<DeviceType>()), + _imagew(atom->k_image.view<DeviceType>()), + _moleculew(atom->k_molecule.view<DeviceType>()), + _nspecialw(atom->k_nspecial.view<DeviceType>()), + _specialw(atom->k_special.view<DeviceType>()), + _num_bondw(atom->k_num_bond.view<DeviceType>()), + _bond_typew(atom->k_bond_type.view<DeviceType>()), + _bond_atomw(atom->k_bond_atom.view<DeviceType>()), + _num_anglew(atom->k_num_angle.view<DeviceType>()), + _angle_typew(atom->k_angle_type.view<DeviceType>()), + _angle_atom1w(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2w(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3w(atom->k_angle_atom3.view<DeviceType>()), + _sendlist(sendlist.template view<DeviceType>()), + _copylist(copylist.template view<DeviceType>()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 to store buffer length + elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + int k; + const int i = _sendlist(mysend); + _buf(mysend,0) = elements; + int m = 1; + _buf(mysend,m++) = _x(i,0); + _buf(mysend,m++) = _x(i,1); + _buf(mysend,m++) = _x(i,2); + _buf(mysend,m++) = _v(i,0); + _buf(mysend,m++) = _v(i,1); + _buf(mysend,m++) = _v(i,2); + _buf(mysend,m++) = _tag(i); + _buf(mysend,m++) = _type(i); + _buf(mysend,m++) = _mask(i); + _buf(mysend,m++) = _image(i); + _buf(mysend,m++) = _molecule(i); + _buf(mysend,m++) = _num_bond(i); + for (k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = _bond_type(i,k); + _buf(mysend,m++) = _bond_atom(i,k); + } + _buf(mysend,m++) = _num_angle(i); + for (k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = _angle_type(i,k); + _buf(mysend,m++) = _angle_atom1(i,k); + _buf(mysend,m++) = _angle_atom2(i,k); + _buf(mysend,m++) = _angle_atom3(i,k); + } + _buf(mysend,m++) = _nspecial(i,0); + _buf(mysend,m++) = _nspecial(i,1); + _buf(mysend,m++) = _nspecial(i,2); + for (k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = _special(i,k); + + const int j = _copylist(mysend); + + if(j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + _moleculew(i) = _molecule(j); + _num_bondw(i) = _num_bond(j); + for (k = 0; k < _num_bond(j); k++) { + _bond_typew(i,k) = _bond_type(j,k); + _bond_atomw(i,k) = _bond_atom(j,k); + } + _num_anglew(i) = _num_angle(j); + for (k = 0; k < _num_angle(j); k++) { + _angle_typew(i,k) = _angle_type(j,k); + _angle_atom1w(i,k) = _angle_atom1(j,k); + _angle_atom2w(i,k) = _angle_atom2(j,k); + _angle_atom3w(i,k) = _angle_atom3(j,k); + } + _nspecialw(i,0) = _nspecial(j,0); + _nspecialw(i,1) = _nspecial(j,1); + _nspecialw(i,2) = _nspecial(j,2); + for (k = 0; k < _nspecial(j,2); k++) + _specialw(i,k) = _special(j,k); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space,int dim,X_FLOAT lo, + X_FLOAT hi ) +{ + const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()* + k_buf.view<LMPHostType>().dimension_1())/elements) { + int newsize = nsend*elements/k_buf.view<LMPHostType>().dimension_1()+1; + k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1()); + } + if(space == Host) { + AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPHostType::fence(); + return nsend*elements; + } else { + AtomVecAngleKokkos_PackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPDeviceType::fence(); + return nsend*elements; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_exchange(int i, double *buf) +{ + int k; + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = ubuf(h_molecule(i)).d; + + buf[m++] = ubuf(h_num_bond(i)).d; + for (k = 0; k < h_num_bond(i); k++) { + buf[m++] = ubuf(h_bond_type(i,k)).d; + buf[m++] = ubuf(h_bond_atom(i,k)).d; + } + buf[m++] = ubuf(h_num_angle(i)).d; + for (k = 0; k < h_num_angle(i); k++) { + buf[m++] = ubuf(h_angle_type(i,k)).d; + buf[m++] = ubuf(h_angle_atom1(i,k)).d; + buf[m++] = ubuf(h_angle_atom2(i,k)).d; + buf[m++] = ubuf(h_angle_atom3(i,k)).d; + } + buf[m++] = ubuf(h_nspecial(i,0)).d; + buf[m++] = ubuf(h_nspecial(i,1)).d; + buf[m++] = ubuf(h_nspecial(i,2)).d; + for (k = 0; k < h_nspecial(i,2); k++) + buf[m++] = ubuf(h_special(i,k)).d; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecAngleKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + typename AT::t_int_1d _num_angle; + typename AT::t_int_2d _angle_type; + typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + size_t elements; + + AtomVecAngleKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _molecule(atom->k_molecule.view<DeviceType>()), + _nspecial(atom->k_nspecial.view<DeviceType>()), + _special(atom->k_special.view<DeviceType>()), + _num_bond(atom->k_num_bond.view<DeviceType>()), + _bond_type(atom->k_bond_type.view<DeviceType>()), + _bond_atom(atom->k_bond_atom.view<DeviceType>()), + _num_angle(atom->k_num_angle.view<DeviceType>()), + _angle_type(atom->k_angle_type.view<DeviceType>()), + _angle_atom1(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3(atom->k_angle_atom3.view<DeviceType>()), + _nlocal(nlocal.template view<DeviceType>()),_dim(dim), + _lo(lo),_hi(hi){ + elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + int m = 1; + _x(i,0) = _buf(myrecv,m++); + _x(i,1) = _buf(myrecv,m++); + _x(i,2) = _buf(myrecv,m++); + _v(i,0) = _buf(myrecv,m++); + _v(i,1) = _buf(myrecv,m++); + _v(i,2) = _buf(myrecv,m++); + _tag(i) = _buf(myrecv,m++); + _type(i) = _buf(myrecv,m++); + _mask(i) = _buf(myrecv,m++); + _image(i) = _buf(myrecv,m++); + + _molecule(i) = _buf(myrecv,m++); + _num_bond(i) = _buf(myrecv,m++); + int k; + for (k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = _buf(myrecv,m++); + _bond_atom(i,k) = _buf(myrecv,m++); + } + _num_angle(i) = _buf(myrecv,m++); + for (k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = _buf(myrecv,m++); + _angle_atom1(i,k) = _buf(myrecv,m++); + _angle_atom2(i,k) = _buf(myrecv,m++); + _angle_atom3(i,k) = _buf(myrecv,m++); + } + _nspecial(i,0) = _buf(myrecv,m++); + _nspecial(i,1) = _buf(myrecv,m++); + _nspecial(i,2) = _buf(myrecv,m++); + for (k = 0; k < _nspecial(i,2); k++) + _special(i,k) = _buf(myrecv,m++); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) { + const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + LMPHostType::fence(); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify<LMPHostType>(); + k_count.sync<LMPDeviceType>(); + AtomVecAngleKokkos_UnpackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + LMPDeviceType::fence(); + k_count.modify<LMPDeviceType>(); + k_count.sync<LMPHostType>(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | + ANGLE_MASK | SPECIAL_MASK); + + int k; + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i; + + h_num_bond(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_bond(nlocal); k++) { + h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_num_angle(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_angle(nlocal); k++) { + h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_nspecial(nlocal,0) = (int) ubuf(buf[m++]).i; + h_nspecial(nlocal,1) = (int) ubuf(buf[m++]).i; + h_nspecial(nlocal,2) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_nspecial(nlocal,2); k++) + h_special(nlocal,k) = (tagint) ubuf(buf[m++]).i; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 0; + for (i = 0; i < nlocal; i++) + n += 14 + 2*h_num_bond(i) + 4*h_num_angle(i); + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_restart(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + + buf[m++] = ubuf(h_molecule(i)).d; + + buf[m++] = ubuf(h_num_bond(i)).d; + for (int k = 0; k < h_num_bond(i); k++) { + buf[m++] = ubuf(MAX(h_bond_type(i,k),-h_bond_type(i,k))).d; + buf[m++] = ubuf(h_bond_atom(i,k)).d; + } + + buf[m++] = ubuf(h_num_angle(i)).d; + for (int k = 0; k < h_num_angle(i); k++) { + buf[m++] = ubuf(MAX(h_angle_type(i,k),-h_angle_type(i,k))).d; + buf[m++] = ubuf(h_angle_atom1(i,k)).d; + buf[m++] = ubuf(h_angle_atom2(i,k)).d; + buf[m++] = ubuf(h_angle_atom3(i,k)).d; + } + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::unpack_restart(double *buf) +{ + int k; + + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + + h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i; + + h_num_bond(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_bond(nlocal); k++) { + h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_num_angle(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_angle(nlocal); k++) { + h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast<int> (ubuf(buf[m++]).i) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + atomKK->modified(Host,ALL_MASK); + grow(0); + } + atomKK->modified(Host,ALL_MASK); + + tag[nlocal] = 0; + type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask(nlocal) = 1; + h_image(nlocal) = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_molecule(nlocal) = 0; + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::data_atom(double *coord, imageint imagetmp, + char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + h_tag(nlocal) = atoi(values[0]); + if (h_tag(nlocal) <= 0) + error->one(FLERR,"Invalid atom ID in Atoms section of data file"); + + h_molecule(nlocal) = atoi(values[1]); + if (h_molecule(nlocal) <= 0) + error->one(FLERR,"Invalid molecule ID in Atoms section of data file"); + + h_type(nlocal) = atoi(values[2]); + if (h_type(nlocal) <= 0 || h_type(nlocal) > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image(nlocal) = imagetmp; + + h_mask(nlocal) = 1; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::data_atom_hybrid(int nlocal, char **values) +{ + h_molecule(nlocal) = atoi(values[0]); + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + return 1; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::pack_data(double **buf) +{ + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = h_tag(i); + buf[i][1] = h_molecule(i); + buf[i][2] = h_type(i); + buf[i][3] = h_x(i,0); + buf[i][4] = h_x(i,1); + buf[i][5] = h_x(i,2); + buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; + buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; + buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::pack_data_hybrid(int i, double *buf) +{ + buf[0] = h_molecule(i); + return 1; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,"%d %d %d %-1.16e %-1.16e %-1.16e %d %d %d\n", + (int) buf[i][0],(int) buf[i][1], (int) buf[i][2], + buf[i][3],buf[i][4],buf[i][5], + (int) buf[i][6],(int) buf[i][7],(int) buf[i][8]); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecAngleKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," " TAGINT_FORMAT, (tagint) (buf[0])); + return 1; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecAngleKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); + + if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax); + if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3); + if (atom->memcheck("special")) + bytes += memory->usage(special,nmax,atom->maxspecial); + + if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax); + if (atom->memcheck("bond_type")) + bytes += memory->usage(bond_type,nmax,atom->bond_per_atom); + if (atom->memcheck("bond_atom")) + bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom); + + if (atom->memcheck("num_angle")) bytes += memory->usage(num_angle,nmax); + if (atom->memcheck("angle_type")) + bytes += memory->usage(angle_type,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom1")) + bytes += memory->usage(angle_atom1,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom2")) + bytes += memory->usage(angle_atom2,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom3")) + bytes += memory->usage(angle_atom3,nmax,atom->angle_per_atom); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPDeviceType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.sync<LMPDeviceType>(); + atomKK->k_special.sync<LMPDeviceType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.sync<LMPDeviceType>(); + atomKK->k_bond_type.sync<LMPDeviceType>(); + atomKK->k_bond_atom.sync<LMPDeviceType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.sync<LMPDeviceType>(); + atomKK->k_angle_type.sync<LMPDeviceType>(); + atomKK->k_angle_atom1.sync<LMPDeviceType>(); + atomKK->k_angle_atom2.sync<LMPDeviceType>(); + atomKK->k_angle_atom3.sync<LMPDeviceType>(); + } + } else { + if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPHostType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.sync<LMPHostType>(); + atomKK->k_special.sync<LMPHostType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.sync<LMPHostType>(); + atomKK->k_bond_type.sync<LMPHostType>(); + atomKK->k_bond_atom.sync<LMPHostType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.sync<LMPHostType>(); + atomKK->k_angle_type.sync<LMPHostType>(); + atomKK->k_angle_atom1.sync<LMPHostType>(); + atomKK->k_angle_atom2.sync<LMPHostType>(); + atomKK->k_angle_atom3.sync<LMPHostType>(); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPDeviceType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.modify<LMPDeviceType>(); + atomKK->k_special.modify<LMPDeviceType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.modify<LMPDeviceType>(); + atomKK->k_bond_type.modify<LMPDeviceType>(); + atomKK->k_bond_atom.modify<LMPDeviceType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.modify<LMPDeviceType>(); + atomKK->k_angle_type.modify<LMPDeviceType>(); + atomKK->k_angle_atom1.modify<LMPDeviceType>(); + atomKK->k_angle_atom2.modify<LMPDeviceType>(); + atomKK->k_angle_atom3.modify<LMPDeviceType>(); + } + } else { + if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPHostType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.modify<LMPHostType>(); + atomKK->k_special.modify<LMPHostType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.modify<LMPHostType>(); + atomKK->k_bond_type.modify<LMPHostType>(); + atomKK->k_bond_atom.modify<LMPHostType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.modify<LMPHostType>(); + atomKK->k_angle_type.modify<LMPHostType>(); + atomKK->k_angle_atom1.modify<LMPHostType>(); + atomKK->k_angle_atom2.modify<LMPHostType>(); + atomKK->k_angle_atom3.modify<LMPHostType>(); + } + } +} diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h new file mode 100644 index 0000000000..b542761073 --- /dev/null +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -0,0 +1,153 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(angle/kk,AtomVecAngleKokkos) + +#else + +#ifndef LMP_ATOM_VEC_ANGLE_KOKKOS_H +#define LMP_ATOM_VEC_ANGLE_KOKKOS_H + +#include "atom_vec_kokkos.h" + +namespace LAMMPS_NS { + +class AtomVecAngleKokkos : public AtomVecKokkos { + public: + AtomVecAngleKokkos(class LAMMPS *); + virtual ~AtomVecAngleKokkos() {} + void grow(int); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int pack_reverse(int, int, double *); + void unpack_reverse(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, tagint, char **); + int data_atom_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + bigint memory_usage(); + + void grow_reset(); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + + protected: + + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; + + tagint *molecule; + int **nspecial; + tagint **special; + int *num_bond; + int **bond_type; + tagint **bond_atom; + + int *num_angle; + int **angle_type; + tagint **angle_atom1,**angle_atom2,**angle_atom3; + + DAT::t_tagint_1d d_tag; + DAT::t_int_1d d_type, d_mask; + HAT::t_tagint_1d h_tag; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + + DAT::t_tagint_1d d_molecule; + DAT::t_int_2d d_nspecial; + DAT::t_tagint_2d d_special; + DAT::t_int_1d d_num_bond; + DAT::t_int_2d d_bond_type; + DAT::t_tagint_2d d_bond_atom; + + HAT::t_tagint_1d h_molecule; + HAT::t_int_2d h_nspecial; + HAT::t_tagint_2d h_special; + HAT::t_int_1d h_num_bond; + HAT::t_int_2d h_bond_type; + HAT::t_tagint_2d h_bond_atom; + + DAT::t_int_1d d_num_angle; + DAT::t_int_2d d_angle_type; + DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; + + HAT::t_int_1d h_num_angle; + HAT::t_int_2d h_angle_type; + HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; + + DAT::tdual_int_1d k_count; + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 2125519129..9b06a49149 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -553,14 +553,15 @@ void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf) { - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - buf[m++] = f[i][0]; - buf[m++] = f[i][1]; - buf[m++] = f[i][2]; + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); } return m; } @@ -569,14 +570,17 @@ int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf) void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf) { - int i,j,m; + if(n > 0) { + sync(Host,F_MASK); + modified(Host,F_MASK); + } - m = 0; - for (i = 0; i < n; i++) { - j = list[i]; - f[j][0] += buf[m++]; - f[j][1] += buf[m++]; - f[j][2] += buf[m++]; + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; } } @@ -588,11 +592,11 @@ struct AtomVecAtomicKokkos_PackBorder { typename ArrayTypes<DeviceType>::t_xfloat_2d _buf; const typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; const typename ArrayTypes<DeviceType>::t_x_array_randomread _x; const typename ArrayTypes<DeviceType>::t_tagint_1d _tag; const typename ArrayTypes<DeviceType>::t_int_1d _type; const typename ArrayTypes<DeviceType>::t_int_1d _mask; - const int _iswap; X_FLOAT _dx,_dy,_dz; AtomVecAtomicKokkos_PackBorder( @@ -694,9 +698,9 @@ int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf, buf[m++] = h_x(j,0); buf[m++] = h_x(j,1); buf[m++] = h_x(j,2); - buf[m++] = h_tag[j]; - buf[m++] = h_type[j]; - buf[m++] = h_mask[j]; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; } } else { if (domain->triclinic == 0) { @@ -713,11 +717,16 @@ int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf, buf[m++] = h_x(j,0) + dx; buf[m++] = h_x(j,1) + dy; buf[m++] = h_x(j,2) + dz; - buf[m++] = h_tag[j]; - buf[m++] = h_type[j]; - buf[m++] = h_mask[j]; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; } } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + return m; } @@ -736,9 +745,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf, buf[m++] = h_x(j,0); buf[m++] = h_x(j,1); buf[m++] = h_x(j,2); - buf[m++] = h_tag[j]; - buf[m++] = h_type[j]; - buf[m++] = h_mask[j]; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; buf[m++] = h_v(j,0); buf[m++] = h_v(j,1); buf[m++] = h_v(j,2); @@ -759,9 +768,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf, buf[m++] = h_x(j,0) + dx; buf[m++] = h_x(j,1) + dy; buf[m++] = h_x(j,2) + dz; - buf[m++] = h_tag[j]; - buf[m++] = h_type[j]; - buf[m++] = h_mask[j]; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; buf[m++] = h_v(j,0); buf[m++] = h_v(j,1); buf[m++] = h_v(j,2); @@ -775,9 +784,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf, buf[m++] = h_x(j,0) + dx; buf[m++] = h_x(j,1) + dy; buf[m++] = h_x(j,2) + dz; - buf[m++] = h_tag[j]; - buf[m++] = h_type[j]; - buf[m++] = h_mask[j]; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; if (mask[i] & deform_groupbit) { buf[m++] = h_v(j,0) + dvx; buf[m++] = h_v(j,1) + dvy; @@ -790,6 +799,11 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf, } } } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + return m; } @@ -861,10 +875,15 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf) h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; - h_tag[i] = static_cast<int> (buf[m++]); - h_type[i] = static_cast<int> (buf[m++]); - h_mask[i] = static_cast<int> (buf[m++]); + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); } /* ---------------------------------------------------------------------- */ @@ -880,13 +899,18 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf) h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; - h_tag[i] = static_cast<int> (buf[m++]); - h_type[i] = static_cast<int> (buf[m++]); - h_mask[i] = static_cast<int> (buf[m++]); + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; h_v(i,0) = buf[m++]; h_v(i,1) = buf[m++]; h_v(i,2) = buf[m++]; } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); } /* ---------------------------------------------------------------------- */ @@ -895,7 +919,6 @@ template<class DeviceType> struct AtomVecAtomicKokkos_PackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes<DeviceType> AT; - X_FLOAT _lo,_hi; typename AT::t_x_array_randomread _x; typename AT::t_v_array_randomread _v; typename AT::t_tagint_1d_randomread _tag; @@ -910,9 +933,10 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { typename AT::t_imageint_1d _imagew; typename AT::t_xfloat_2d_um _buf; - int _nlocal,_dim; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; AtomVecAtomicKokkos_PackExchangeFunctor( const AtomKokkos* atom, @@ -977,7 +1001,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor { int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) { - if(nsend > (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) { + if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) { int newsize = nsend*11/k_buf.view<LMPHostType>().dimension_1()+1; k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1()); } @@ -1005,10 +1029,10 @@ int AtomVecAtomicKokkos::pack_exchange(int i, double *buf) buf[m++] = h_v(i,0); buf[m++] = h_v(i,1); buf[m++] = h_v(i,2); - buf[m++] = h_tag[i]; - buf[m++] = h_type[i]; - buf[m++] = h_mask[i]; - *((tagint *) &buf[m++]) = h_image[i]; + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -1024,7 +1048,6 @@ template<class DeviceType> struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes<DeviceType> AT; - X_FLOAT _lo,_hi; typename AT::t_x_array _x; typename AT::t_v_array _v; typename AT::t_tagint_1d _tag; @@ -1033,8 +1056,9 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor { typename AT::t_imageint_1d _image; typename AT::t_xfloat_2d_um _buf; - int _dim; typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; AtomVecAtomicKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, @@ -1113,10 +1137,10 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf) h_v(nlocal,0) = buf[m++]; h_v(nlocal,1) = buf[m++]; h_v(nlocal,2) = buf[m++]; - h_tag[nlocal] = static_cast<int> (buf[m++]); - h_type[nlocal] = static_cast<int> (buf[m++]); - h_mask[nlocal] = static_cast<int> (buf[m++]); - h_image[nlocal] = static_cast<int> (buf[m++]); + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -1159,10 +1183,10 @@ int AtomVecAtomicKokkos::pack_restart(int i, double *buf) buf[m++] = h_x(i,0); buf[m++] = h_x(i,1); buf[m++] = h_x(i,2); - buf[m++] = h_tag[i]; - buf[m++] = h_type[i]; - buf[m++] = h_mask[i]; - buf[m++] = h_image[i]; + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; buf[m++] = h_v(i,0); buf[m++] = h_v(i,1); buf[m++] = h_v(i,2); @@ -1192,17 +1216,17 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf) h_x(nlocal,0) = buf[m++]; h_x(nlocal,1) = buf[m++]; h_x(nlocal,2) = buf[m++]; - h_tag[nlocal] = static_cast<int> (buf[m++]); - h_type[nlocal] = static_cast<int> (buf[m++]); - h_mask[nlocal] = static_cast<int> (buf[m++]); - h_image[nlocal] = *((tagint *) &buf[m++]); + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; h_v(nlocal,0) = buf[m++]; h_v(nlocal,1) = buf[m++]; h_v(nlocal,2) = buf[m++]; double **extra = atom->extra; if (atom->nextra_store) { - int size = static_cast<int> (buf[0]) - m; + int size = static_cast<int> (ubuf(buf[m++]).i) - m; for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; } diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp new file mode 100644 index 0000000000..a1b2c7823c --- /dev/null +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -0,0 +1,1717 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "stdlib.h" +#include "atom_vec_bond_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +/* ---------------------------------------------------------------------- */ + +AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 1; + bonds_allow = 1; + mass_type = 1; + + comm_x_only = comm_f_only = 1; + size_forward = 3; + size_reverse = 3; + size_border = 7; + size_velocity = 3; + size_data_atom = 6; + size_data_vel = 4; + xcol_data = 4; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by DELTA + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + + memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); + memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); + memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial,"atom:special"); + memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); + memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom,"atom:bond_type"); + memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom"); + + grow_reset(); + sync(Host,ALL_MASK); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atomKK->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + molecule = atomKK->molecule; + d_molecule = atomKK->k_molecule.d_view; + h_molecule = atomKK->k_molecule.h_view; + nspecial = atomKK->nspecial; + d_nspecial = atomKK->k_nspecial.d_view; + h_nspecial = atomKK->k_nspecial.h_view; + special = atomKK->special; + d_special = atomKK->k_special.d_view; + h_special = atomKK->k_special.h_view; + num_bond = atomKK->num_bond; + d_num_bond = atomKK->k_num_bond.d_view; + h_num_bond = atomKK->k_num_bond.h_view; + bond_type = atomKK->bond_type; + d_bond_type = atomKK->k_bond_type.d_view; + h_bond_type = atomKK->k_bond_type.h_view; + bond_atom = atomKK->bond_atom; + d_bond_atom = atomKK->k_bond_atom.d_view; + h_bond_atom = atomKK->k_bond_atom.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::copy(int i, int j, int delflag) +{ + int k; + + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + mask[j] = mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + + h_molecule(j) = h_molecule(i); + + h_num_bond(j) = h_num_bond(i); + for (k = 0; k < h_num_bond(j); k++) { + h_bond_type(j,k) = h_bond_type(i,k); + h_bond_atom(j,k) = h_bond_atom(i,k); + } + + h_nspecial(j,0) = h_nspecial(i,0); + h_nspecial(j,1) = h_nspecial(i,1); + h_nspecial(j,2) = h_nspecial(i,2); + for (k = 0; k < h_nspecial(j,2); k++) h_special(j,k) = h_special(i,k); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecBondKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecBondKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3; + const size_t elements = 3; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecBondKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecBondKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecBondKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecBondKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecBondKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_x_array _xw; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecBondKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst, const int &pbc_flag, const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecBondKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecBondKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecBondKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecBondKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) + modified(Host,F_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG> +struct AtomVecBondKokkos_PackBorder { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + typename AT::t_xfloat_2d _buf; + const typename AT::t_int_2d_const _list; + const int _iswap; + const typename AT::t_x_array_randomread _x; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_tagint_1d _molecule; + X_FLOAT _dx,_dy,_dz; + + AtomVecBondKokkos_PackBorder( + const typename AT::t_xfloat_2d &buf, + const typename AT::t_int_2d_const &list, + const int & iswap, + const typename AT::t_x_array &x, + const typename AT::t_tagint_1d &tag, + const typename AT::t_int_1d &type, + const typename AT::t_int_1d &mask, + const typename AT::t_tagint_1d &molecule, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _molecule(j); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _molecule(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecBondKokkos_PackBorder<LMPHostType,1> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecBondKokkos_PackBorder<LMPDeviceType,1> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecBondKokkos_PackBorder<LMPHostType,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecBondKokkos_PackBorder<LMPDeviceType,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_molecule(j); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecBondKokkos_UnpackBorder { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + const typename AT::t_xfloat_2d_const _buf; + typename AT::t_x_array _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_tagint_1d _molecule; + int _first; + + + AtomVecBondKokkos_UnpackBorder( + const typename AT::t_xfloat_2d_const &buf, + typename AT::t_x_array &x, + typename AT::t_tagint_1d &tag, + typename AT::t_int_1d &type, + typename AT::t_int_1d &mask, + typename AT::t_tagint_1d &molecule, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), + _first(first){ + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast<int> (_buf(i,3)); + _type(i+_first) = static_cast<int> (_buf(i,4)); + _mask(i+_first) = static_cast<int> (_buf(i,5)); + _molecule(i+_first) = static_cast<int> (_buf(i,6)); + + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space) { + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + while (first+n >= nmax) grow(0); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + if(space==Host) { + struct AtomVecBondKokkos_UnpackBorder<LMPHostType> + f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + struct AtomVecBondKokkos_UnpackBorder<LMPDeviceType> + f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::unpack_border(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecBondKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_tagint_1d_randomread _molecule; + typename AT::t_int_2d_randomread _nspecial; + typename AT::t_tagint_2d_randomread _special; + typename AT::t_int_1d_randomread _num_bond; + typename AT::t_int_2d_randomread _bond_type; + typename AT::t_tagint_2d_randomread _bond_atom; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_tagint_1d _moleculew; + typename AT::t_int_2d _nspecialw; + typename AT::t_tagint_2d _specialw; + typename AT::t_int_1d _num_bondw; + typename AT::t_int_2d _bond_typew; + typename AT::t_tagint_2d _bond_atomw; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + size_t elements; + + AtomVecBondKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _molecule(atom->k_molecule.view<DeviceType>()), + _nspecial(atom->k_nspecial.view<DeviceType>()), + _special(atom->k_special.view<DeviceType>()), + _num_bond(atom->k_num_bond.view<DeviceType>()), + _bond_type(atom->k_bond_type.view<DeviceType>()), + _bond_atom(atom->k_bond_atom.view<DeviceType>()), + _xw(atom->k_x.view<DeviceType>()), + _vw(atom->k_v.view<DeviceType>()), + _tagw(atom->k_tag.view<DeviceType>()), + _typew(atom->k_type.view<DeviceType>()), + _maskw(atom->k_mask.view<DeviceType>()), + _imagew(atom->k_image.view<DeviceType>()), + _moleculew(atom->k_molecule.view<DeviceType>()), + _nspecialw(atom->k_nspecial.view<DeviceType>()), + _specialw(atom->k_special.view<DeviceType>()), + _num_bondw(atom->k_num_bond.view<DeviceType>()), + _bond_typew(atom->k_bond_type.view<DeviceType>()), + _bond_atomw(atom->k_bond_atom.view<DeviceType>()), + _sendlist(sendlist.template view<DeviceType>()), + _copylist(copylist.template view<DeviceType>()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 to store buffer lenght + elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + int k; + const int i = _sendlist(mysend); + _buf(mysend,0) = elements; + int m = 1; + _buf(mysend,m++) = _x(i,0); + _buf(mysend,m++) = _x(i,1); + _buf(mysend,m++) = _x(i,2); + _buf(mysend,m++) = _v(i,0); + _buf(mysend,m++) = _v(i,1); + _buf(mysend,m++) = _v(i,2); + _buf(mysend,m++) = _tag(i); + _buf(mysend,m++) = _type(i); + _buf(mysend,m++) = _mask(i); + _buf(mysend,m++) = _image(i); + _buf(mysend,m++) = _molecule(i); + _buf(mysend,m++) = _num_bond(i); + for (k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = _bond_type(i,k); + _buf(mysend,m++) = _bond_atom(i,k); + } + _buf(mysend,m++) = _nspecial(i,0); + _buf(mysend,m++) = _nspecial(i,1); + _buf(mysend,m++) = _nspecial(i,2); + for (k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = _special(i,k); + + const int j = _copylist(mysend); + + if(j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + _moleculew(i) = _molecule(j); + _num_bondw(i) = _num_bond(j); + for (k = 0; k < _num_bond(j); k++) { + _bond_typew(i,k) = _bond_type(j,k); + _bond_atomw(i,k) = _bond_atom(j,k); + } + _nspecialw(i,0) = _nspecial(j,0); + _nspecialw(i,1) = _nspecial(j,1); + _nspecialw(i,2) = _nspecial(j,2); + for (k = 0; k < _nspecial(j,2); k++) + _specialw(i,k) = _special(j,k); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space,int dim,X_FLOAT lo, + X_FLOAT hi ) +{ + const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; + if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()* + k_buf.view<LMPHostType>().dimension_1())/elements) { + int newsize = nsend*elements/k_buf.view<LMPHostType>().dimension_1()+1; + k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1()); + } + if(space == Host) { + AtomVecBondKokkos_PackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPHostType::fence(); + return nsend*elements; + } else { + AtomVecBondKokkos_PackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPDeviceType::fence(); + return nsend*elements; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_exchange(int i, double *buf) +{ + int k; + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = ubuf(h_molecule(i)).d; + + buf[m++] = ubuf(h_num_bond(i)).d; + for (k = 0; k < h_num_bond(i); k++) { + buf[m++] = ubuf(h_bond_type(i,k)).d; + buf[m++] = ubuf(h_bond_atom(i,k)).d; + } + buf[m++] = ubuf(h_nspecial(i,0)).d; + buf[m++] = ubuf(h_nspecial(i,1)).d; + buf[m++] = ubuf(h_nspecial(i,2)).d; + for (k = 0; k < h_nspecial(i,2); k++) + buf[m++] = ubuf(h_special(i,k)).d; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecBondKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + size_t elements; + + AtomVecBondKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _molecule(atom->k_molecule.view<DeviceType>()), + _nspecial(atom->k_nspecial.view<DeviceType>()), + _special(atom->k_special.view<DeviceType>()), + _num_bond(atom->k_num_bond.view<DeviceType>()), + _bond_type(atom->k_bond_type.view<DeviceType>()), + _bond_atom(atom->k_bond_atom.view<DeviceType>()), + _nlocal(nlocal.template view<DeviceType>()),_dim(dim), + _lo(lo),_hi(hi){ + elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + int m = 1; + _x(i,0) = _buf(myrecv,m++); + _x(i,1) = _buf(myrecv,m++); + _x(i,2) = _buf(myrecv,m++); + _v(i,0) = _buf(myrecv,m++); + _v(i,1) = _buf(myrecv,m++); + _v(i,2) = _buf(myrecv,m++); + _tag(i) = _buf(myrecv,m++); + _type(i) = _buf(myrecv,m++); + _mask(i) = _buf(myrecv,m++); + _image(i) = _buf(myrecv,m++); + + _molecule(i) = _buf(myrecv,m++); + _num_bond(i) = _buf(myrecv,m++); + int k; + for (k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = _buf(myrecv,m++); + _bond_atom(i,k) = _buf(myrecv,m++); + } + _nspecial(i,0) = _buf(myrecv,m++); + _nspecial(i,1) = _buf(myrecv,m++); + _nspecial(i,2) = _buf(myrecv,m++); + for (k = 0; k < _nspecial(i,2); k++) + _special(i,k) = _buf(myrecv,m++); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) { + const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + LMPHostType::fence(); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify<LMPHostType>(); + k_count.sync<LMPDeviceType>(); + AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + LMPDeviceType::fence(); + k_count.modify<LMPDeviceType>(); + k_count.sync<LMPHostType>(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecBondKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK); + + int k; + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i; + + h_num_bond(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_bond(nlocal); k++) { + h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_nspecial(nlocal,0) = (int) ubuf(buf[m++]).i; + h_nspecial(nlocal,1) = (int) ubuf(buf[m++]).i; + h_nspecial(nlocal,2) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_nspecial(nlocal,2); k++) + h_special(nlocal,k) = (tagint) ubuf(buf[m++]).i; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecBondKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 0; + for (i = 0; i < nlocal; i++) + n += 13 + 2*h_num_bond[i]; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_restart(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + + buf[m++] = ubuf(h_molecule(i)).d; + + buf[m++] = ubuf(h_num_bond(i)).d; + for (int k = 0; k < h_num_bond(i); k++) { + buf[m++] = ubuf(MAX(h_bond_type(i,k),-h_bond_type(i,k))).d; + buf[m++] = ubuf(h_bond_atom(i,k)).d; + } + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecBondKokkos::unpack_restart(double *buf) +{ + int k; + + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + + h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i; + + h_num_bond(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_bond(nlocal); k++) { + h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast<int> (ubuf(buf[m++]).i) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + atomKK->modified(Host,ALL_MASK); + grow(0); + } + atomKK->modified(Host,ALL_MASK); + + tag[nlocal] = 0; + type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask(nlocal) = 1; + h_image(nlocal) = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_molecule(nlocal) = 0; + h_num_bond(nlocal) = 0; + h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::data_atom(double *coord, imageint imagetmp, + char **values) +{ + int nlocal = atomKK->nlocal; + if (nlocal == nmax) grow(0); + + h_tag(nlocal) = atoi(values[0]); + if (h_tag(nlocal) <= 0) + error->one(FLERR,"Invalid atom ID in Atoms section of data file"); + + h_molecule(nlocal) = atoi(values[1]); + if (h_molecule(nlocal) <= 0) + error->one(FLERR,"Invalid molecule ID in Atoms section of data file"); + + h_type(nlocal) = atoi(values[2]); + if (h_type(nlocal) <= 0 || h_type(nlocal) > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image(nlocal) = imagetmp; + + h_mask(nlocal) = 1; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + h_num_bond(nlocal) = 0; + + atomKK->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecBondKokkos::data_atom_hybrid(int nlocal, char **values) +{ + h_molecule(nlocal) = atoi(values[0]); + h_num_bond(nlocal) = 0; + return 1; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::pack_data(double **buf) +{ + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = h_tag(i); + buf[i][1] = h_molecule(i); + buf[i][2] = h_type(i); + buf[i][3] = h_x(i,0); + buf[i][4] = h_x(i,1); + buf[i][5] = h_x(i,2); + buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; + buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; + buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecBondKokkos::pack_data_hybrid(int i, double *buf) +{ + buf[0] = h_molecule(i); + return 1; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,"%d %d %d %-1.16e %-1.16e %-1.16e %d %d %d\n", + (int) buf[i][0],(int) buf[i][1], (int) buf[i][2], + buf[i][3],buf[i][4],buf[i][5], + (int) buf[i][6],(int) buf[i][7],(int) buf[i][8]); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecBondKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," " TAGINT_FORMAT, (tagint) (buf[0])); + return 1; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecBondKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); + + if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax); + if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3); + if (atom->memcheck("special")) + bytes += memory->usage(special,nmax,atom->maxspecial); + + if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax); + if (atom->memcheck("bond_type")) + bytes += memory->usage(bond_type,nmax,atom->bond_per_atom); + if (atom->memcheck("bond_atom")) + bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPDeviceType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.sync<LMPDeviceType>(); + atomKK->k_special.sync<LMPDeviceType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.sync<LMPDeviceType>(); + atomKK->k_bond_type.sync<LMPDeviceType>(); + atomKK->k_bond_atom.sync<LMPDeviceType>(); + } + } else { + if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPHostType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.sync<LMPHostType>(); + atomKK->k_special.sync<LMPHostType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.sync<LMPHostType>(); + atomKK->k_bond_type.sync<LMPHostType>(); + atomKK->k_bond_atom.sync<LMPHostType>(); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecBondKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPDeviceType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.modify<LMPDeviceType>(); + atomKK->k_special.modify<LMPDeviceType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.modify<LMPDeviceType>(); + atomKK->k_bond_type.modify<LMPDeviceType>(); + atomKK->k_bond_atom.modify<LMPDeviceType>(); + } + } else { + if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPHostType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.modify<LMPHostType>(); + atomKK->k_special.modify<LMPHostType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.modify<LMPHostType>(); + atomKK->k_bond_type.modify<LMPHostType>(); + atomKK->k_bond_atom.modify<LMPHostType>(); + } + } +} diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h new file mode 100644 index 0000000000..676cd1202c --- /dev/null +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -0,0 +1,141 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(bond/kk,AtomVecBondKokkos) + +#else + +#ifndef LMP_ATOM_VEC_BOND_KOKKOS_H +#define LMP_ATOM_VEC_BOND_KOKKOS_H + +#include "atom_vec_kokkos.h" + +namespace LAMMPS_NS { + +class AtomVecBondKokkos : public AtomVecKokkos { + public: + AtomVecBondKokkos(class LAMMPS *); + virtual ~AtomVecBondKokkos() {} + void grow(int); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int pack_reverse(int, int, double *); + void unpack_reverse(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, tagint, char **); + int data_atom_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + bigint memory_usage(); + + void grow_reset(); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + + protected: + + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; + + tagint *molecule; + int **nspecial; + tagint **special; + int *num_bond; + int **bond_type; + tagint **bond_atom; + + DAT::t_tagint_1d d_tag; + DAT::t_int_1d d_type, d_mask; + HAT::t_tagint_1d h_tag; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + + DAT::t_tagint_1d d_molecule; + DAT::t_int_2d d_nspecial; + DAT::t_tagint_2d d_special; + DAT::t_int_1d d_num_bond; + DAT::t_int_2d d_bond_type; + DAT::t_tagint_2d d_bond_atom; + + HAT::t_tagint_1d h_molecule; + HAT::t_int_2d h_nspecial; + HAT::t_tagint_2d h_special; + HAT::t_int_1d h_num_bond; + HAT::t_int_2d h_bond_type; + HAT::t_tagint_2d h_bond_atom; + + DAT::tdual_int_1d k_count; + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp new file mode 100644 index 0000000000..97f489fb58 --- /dev/null +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -0,0 +1,1517 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "stdlib.h" +#include "atom_vec_charge_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +/* ---------------------------------------------------------------------- */ + +AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 0; + mass_type = 1; + + comm_x_only = comm_f_only = 1; + size_forward = 3; + size_reverse = 3; + size_border = 7; + size_velocity = 3; + size_data_atom = 6; + size_data_vel = 4; + xcol_data = 4; + + atom->q_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; + +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by DELTA + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + + memory->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); + + grow_reset(); + sync(Host,ALL_MASK); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + q = atomKK->q; + d_q = atomKK->k_q.d_view; + h_q = atomKK->k_q.h_view; + +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::copy(int i, int j, int delflag) +{ + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + mask[j] = mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + + h_q[j] = h_q[i]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecChargeKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecChargeKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3; + const size_t elements = 3; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecChargeKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecChargeKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecChargeKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecChargeKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecChargeKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_x_array _xw; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecChargeKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst, const int &pbc_flag, const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecChargeKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecChargeKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecChargeKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecChargeKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) + modified(Host,F_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG> +struct AtomVecChargeKokkos_PackBorder { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_xfloat_2d _buf; + const typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + const typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + const typename ArrayTypes<DeviceType>::t_tagint_1d _tag; + const typename ArrayTypes<DeviceType>::t_int_1d _type; + const typename ArrayTypes<DeviceType>::t_int_1d _mask; + const typename ArrayTypes<DeviceType>::t_float_1d _q; + X_FLOAT _dx,_dy,_dz; + + AtomVecChargeKokkos_PackBorder( + const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf, + const typename ArrayTypes<DeviceType>::t_int_2d_const &list, + const int & iswap, + const typename ArrayTypes<DeviceType>::t_x_array &x, + const typename ArrayTypes<DeviceType>::t_tagint_1d &tag, + const typename ArrayTypes<DeviceType>::t_int_1d &type, + const typename ArrayTypes<DeviceType>::t_int_1d &mask, + const typename ArrayTypes<DeviceType>::t_float_1d &q, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask),_q(q), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _q(j); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _q(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecChargeKokkos_PackBorder<LMPHostType,1> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecChargeKokkos_PackBorder<LMPDeviceType,1> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecChargeKokkos_PackBorder<LMPHostType,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecChargeKokkos_PackBorder<LMPDeviceType,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q(j); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q(j); + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q[j]; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_q[j]; + } + return m; +} + + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecChargeKokkos_UnpackBorder { + typedef DeviceType device_type; + + const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_tagint_1d _tag; + typename ArrayTypes<DeviceType>::t_int_1d _type; + typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_float_1d _q; + int _first; + + + AtomVecChargeKokkos_UnpackBorder( + const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf, + typename ArrayTypes<DeviceType>::t_x_array &x, + typename ArrayTypes<DeviceType>::t_tagint_1d &tag, + typename ArrayTypes<DeviceType>::t_int_1d &type, + typename ArrayTypes<DeviceType>::t_int_1d &mask, + typename ArrayTypes<DeviceType>::t_float_1d &q, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first){ + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast<int> (_buf(i,3)); + _type(i+_first) = static_cast<int> (_buf(i,4)); + _mask(i+_first) = static_cast<int> (_buf(i,5)); + _q(i+_first) = _buf(i,6); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { + if (first+n >= nmax) { + grow(first+n+100); + } + if(space==Host) { + struct AtomVecChargeKokkos_UnpackBorder<LMPHostType> + f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,first); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + struct AtomVecChargeKokkos_UnpackBorder<LMPDeviceType> + f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::unpack_border(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + + for (i = first; i < last; i++) { + if (i == nmax) { + grow(0); + } + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_q[i] = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_q[i] = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) + h_q[i] = buf[m++]; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecChargeKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_float_1d_randomread _q; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_float_1d _qw; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + + AtomVecChargeKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _q(atom->k_q.view<DeviceType>()), + _xw(atom->k_x.view<DeviceType>()), + _vw(atom->k_v.view<DeviceType>()), + _tagw(atom->k_tag.view<DeviceType>()), + _typew(atom->k_type.view<DeviceType>()), + _maskw(atom->k_mask.view<DeviceType>()), + _imagew(atom->k_image.view<DeviceType>()), + _qw(atom->k_q.view<DeviceType>()), + _sendlist(sendlist.template view<DeviceType>()), + _copylist(copylist.template view<DeviceType>()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 12; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = 12; + _buf(mysend,1) = _x(i,0); + _buf(mysend,2) = _x(i,1); + _buf(mysend,3) = _x(i,2); + _buf(mysend,4) = _v(i,0); + _buf(mysend,5) = _v(i,1); + _buf(mysend,6) = _v(i,2); + _buf(mysend,7) = _tag[i]; + _buf(mysend,8) = _type[i]; + _buf(mysend,9) = _mask[i]; + _buf(mysend,10) = _image[i]; + _buf(mysend,11) = _q[i]; + const int j = _copylist(mysend); + + if(j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + _qw(i) = _q(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space,int dim, + X_FLOAT lo,X_FLOAT hi ) +{ + if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/12) { + int newsize = nsend*12/k_buf.view<LMPHostType>().dimension_1()+1; + k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1()); + } + if(space == Host) { + AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPHostType::fence(); + return nsend*12; + } else { + AtomVecChargeKokkos_PackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPDeviceType::fence(); + return nsend*12; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_exchange(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_q[i]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecChargeKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_float_1d _q; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + + AtomVecChargeKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _q(atom->k_q.view<DeviceType>()), + _nlocal(nlocal.template view<DeviceType>()),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 12; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*buf.template view<DeviceType>().dimension_1())/elements; + + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + _x(i,0) = _buf(myrecv,1); + _x(i,1) = _buf(myrecv,2); + _x(i,2) = _buf(myrecv,3); + _v(i,0) = _buf(myrecv,4); + _v(i,1) = _buf(myrecv,5); + _v(i,2) = _buf(myrecv,6); + _tag[i] = _buf(myrecv,7); + _type[i] = _buf(myrecv,8); + _mask[i] = _buf(myrecv,9); + _image[i] = _buf(myrecv,10); + _q[i] = _buf(myrecv,11); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) { + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/12,f); + LMPHostType::fence(); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify<LMPHostType>(); + k_count.sync<LMPDeviceType>(); + AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/12,f); + LMPDeviceType::fence(); + k_count.modify<LMPDeviceType>(); + k_count.sync<LMPHostType>(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | Q_MASK); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_q[nlocal] = buf[m++]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 12 * nlocal; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_restart(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + + buf[m++] = h_q[i]; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::unpack_restart(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + + h_q[nlocal] = buf[m++]; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast<int> (ubuf(buf[m++]).i) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + //if(nlocal>2) printf("typeA: %i %i\n",type[0],type[1]); + atomKK->modified(Host,ALL_MASK); + grow(0); + //if(nlocal>2) printf("typeB: %i %i\n",type[0],type[1]); + } + atomKK->modified(Host,ALL_MASK); + + tag[nlocal] = 0; + type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask[nlocal] = 1; + h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_q[nlocal] = 0.0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::data_atom(double *coord, imageint imagetmp, + char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + h_tag[nlocal] = atoi(values[0]); + if (tag[nlocal] <= 0) + error->one(FLERR,"Invalid atom ID in Atoms section of data file"); + + h_type[nlocal] = atoi(values[1]); + if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + h_q[nlocal] = atof(values[2]); + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image[nlocal] = imagetmp; + + h_mask[nlocal] = 1; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + atom->nlocal++; +} +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::data_atom_hybrid(int nlocal, char **values) +{ + h_q[nlocal] = atof(values[0]); + + return 1; +} +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::pack_data(double **buf) +{ + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = h_tag[i]; + buf[i][1] = h_type[i]; + buf[i][2] = h_q[i]; + buf[i][3] = h_x(i,0); + buf[i][4] = h_x(i,1); + buf[i][5] = h_x(i,2); + buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; + buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; + buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::pack_data_hybrid(int i, double *buf) +{ + buf[0] = h_q[i]; + return 1; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,"%d %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", + (int) buf[i][0],(int) buf[i][1],buf[i][2],buf[i][3],buf[i][4],buf[i][5], + (int) buf[i][6],(int) buf[i][7],(int) buf[i][8]); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecChargeKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," %-1.16e",buf[0]); + return 1; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecChargeKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); + + if (atom->memcheck("q")) bytes += memory->usage(q,nmax); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>(); + if (mask && Q_MASK) atomKK->k_q.sync<LMPDeviceType>(); + } else { + if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>(); + if (mask && Q_MASK) atomKK->k_q.sync<LMPHostType>(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>(); + if (mask && Q_MASK) atomKK->k_q.modify<LMPDeviceType>(); + } else { + if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); + if (mask && Q_MASK) atomKK->k_q.modify<LMPHostType>(); + } +} diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h new file mode 100644 index 0000000000..98f0f363b9 --- /dev/null +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -0,0 +1,126 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(charge/kk,AtomVecChargeKokkos) + +#else + +#ifndef LMP_ATOM_VEC_CHARGE_KOKKOS_H +#define LMP_ATOM_VEC_CHARGE_KOKKOS_H + +#include "atom_vec_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class AtomVecChargeKokkos : public AtomVecKokkos { + public: + AtomVecChargeKokkos(class LAMMPS *); + virtual ~AtomVecChargeKokkos() {} + void grow(int); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int pack_reverse(int, int, double *); + void unpack_reverse(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, tagint, char **); + int data_atom_hybrid(int , char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + bigint memory_usage(); + + void grow_reset(); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + + protected: + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; + + double *q; + + DAT::t_tagint_1d d_tag; + HAT::t_tagint_1d h_tag; + + DAT::t_int_1d d_type, d_mask; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + + DAT::t_float_1d d_q; + + HAT::t_float_1d h_q; + + DAT::tdual_int_1d k_count; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp new file mode 100644 index 0000000000..684c81c616 --- /dev/null +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -0,0 +1,2321 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "stdlib.h" +#include "atom_vec_full_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +/* ---------------------------------------------------------------------- */ + +AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 1; + bonds_allow = angles_allow = dihedrals_allow = impropers_allow = 1; + mass_type = 1; + + comm_x_only = comm_f_only = 1; + size_forward = 3; + size_reverse = 3; + size_border = 8; + size_velocity = 3; + size_data_atom = 7; + size_data_vel = 4; + xcol_data = 5; + + atom->molecule_flag = atom->q_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by DELTA + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + + memory->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); + memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); + + memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); + memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, + "atom:special"); + memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); + memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, + "atom:bond_type"); + memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, + "atom:bond_atom"); + + memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); + memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, + "atom:angle_type"); + memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, + "atom:angle_atom1"); + memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, + "atom:angle_atom2"); + memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, + "atom:angle_atom3"); + + memory->grow_kokkos(atomKK->k_num_dihedral,atomKK->num_dihedral,nmax,"atom:num_dihedral"); + memory->grow_kokkos(atomKK->k_dihedral_type,atomKK->dihedral_type,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_type"); + memory->grow_kokkos(atomKK->k_dihedral_atom1,atomKK->dihedral_atom1,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_atom1"); + memory->grow_kokkos(atomKK->k_dihedral_atom2,atomKK->dihedral_atom2,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_atom2"); + memory->grow_kokkos(atomKK->k_dihedral_atom3,atomKK->dihedral_atom3,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_atom3"); + memory->grow_kokkos(atomKK->k_dihedral_atom4,atomKK->dihedral_atom4,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_atom4"); + + memory->grow_kokkos(atomKK->k_num_improper,atomKK->num_improper,nmax,"atom:num_improper"); + memory->grow_kokkos(atomKK->k_improper_type,atomKK->improper_type,nmax, + atomKK->improper_per_atom,"atom:improper_type"); + memory->grow_kokkos(atomKK->k_improper_atom1,atomKK->improper_atom1,nmax, + atomKK->improper_per_atom,"atom:improper_atom1"); + memory->grow_kokkos(atomKK->k_improper_atom2,atomKK->improper_atom2,nmax, + atomKK->improper_per_atom,"atom:improper_atom2"); + memory->grow_kokkos(atomKK->k_improper_atom3,atomKK->improper_atom3,nmax, + atomKK->improper_per_atom,"atom:improper_atom3"); + memory->grow_kokkos(atomKK->k_improper_atom4,atomKK->improper_atom4,nmax, + atomKK->improper_per_atom,"atom:improper_atom4"); + + grow_reset(); + sync(Host,ALL_MASK); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + q = atomKK->q; + d_q = atomKK->k_q.d_view; + h_q = atomKK->k_q.h_view; + + molecule = atomKK->molecule; + d_molecule = atomKK->k_molecule.d_view; + h_molecule = atomKK->k_molecule.h_view; + + nspecial = atomKK->nspecial; + d_nspecial = atomKK->k_nspecial.d_view; + h_nspecial = atomKK->k_nspecial.h_view; + special = atomKK->special; + d_special = atomKK->k_special.d_view; + h_special = atomKK->k_special.h_view; + + num_bond = atomKK->num_bond; + d_num_bond = atomKK->k_num_bond.d_view; + h_num_bond = atomKK->k_num_bond.h_view; + bond_type = atomKK->bond_type; + d_bond_type = atomKK->k_bond_type.d_view; + h_bond_type = atomKK->k_bond_type.h_view; + bond_atom = atomKK->bond_atom; + d_bond_atom = atomKK->k_bond_atom.d_view; + h_bond_atom = atomKK->k_bond_atom.h_view; + + num_angle = atomKK->num_angle; + d_num_angle = atomKK->k_num_angle.d_view; + h_num_angle = atomKK->k_num_angle.h_view; + angle_type = atomKK->angle_type; + d_angle_type = atomKK->k_angle_type.d_view; + h_angle_type = atomKK->k_angle_type.h_view; + angle_atom1 = atomKK->angle_atom1; + d_angle_atom1 = atomKK->k_angle_atom1.d_view; + h_angle_atom1 = atomKK->k_angle_atom1.h_view; + angle_atom2 = atomKK->angle_atom2; + d_angle_atom2 = atomKK->k_angle_atom2.d_view; + h_angle_atom2 = atomKK->k_angle_atom2.h_view; + angle_atom3 = atomKK->angle_atom3; + d_angle_atom3 = atomKK->k_angle_atom3.d_view; + h_angle_atom3 = atomKK->k_angle_atom3.h_view; + + num_dihedral = atomKK->num_dihedral; + d_num_dihedral = atomKK->k_num_dihedral.d_view; + h_num_dihedral = atomKK->k_num_dihedral.h_view; + dihedral_type = atomKK->dihedral_type; + d_dihedral_type = atomKK->k_dihedral_type.d_view; + h_dihedral_type = atomKK->k_dihedral_type.h_view; + dihedral_atom1 = atomKK->dihedral_atom1; + d_dihedral_atom1 = atomKK->k_dihedral_atom1.d_view; + h_dihedral_atom1 = atomKK->k_dihedral_atom1.h_view; + dihedral_atom2 = atomKK->dihedral_atom2; + d_dihedral_atom2 = atomKK->k_dihedral_atom2.d_view; + h_dihedral_atom2 = atomKK->k_dihedral_atom2.h_view; + dihedral_atom3 = atomKK->dihedral_atom3; + d_dihedral_atom3 = atomKK->k_dihedral_atom3.d_view; + h_dihedral_atom3 = atomKK->k_dihedral_atom3.h_view; + dihedral_atom4 = atomKK->dihedral_atom4; + d_dihedral_atom4 = atomKK->k_dihedral_atom4.d_view; + h_dihedral_atom4 = atomKK->k_dihedral_atom4.h_view; + + num_improper = atomKK->num_improper; + d_num_improper = atomKK->k_num_improper.d_view; + h_num_improper = atomKK->k_num_improper.h_view; + improper_type = atomKK->improper_type; + d_improper_type = atomKK->k_improper_type.d_view; + h_improper_type = atomKK->k_improper_type.h_view; + improper_atom1 = atomKK->improper_atom1; + d_improper_atom1 = atomKK->k_improper_atom1.d_view; + h_improper_atom1 = atomKK->k_improper_atom1.h_view; + improper_atom2 = atomKK->improper_atom2; + d_improper_atom2 = atomKK->k_improper_atom2.d_view; + h_improper_atom2 = atomKK->k_improper_atom2.h_view; + improper_atom3 = atomKK->improper_atom3; + d_improper_atom3 = atomKK->k_improper_atom3.d_view; + h_improper_atom3 = atomKK->k_improper_atom3.h_view; + improper_atom4 = atomKK->improper_atom4; + d_improper_atom4 = atomKK->k_improper_atom4.d_view; + h_improper_atom4 = atomKK->k_improper_atom4.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::copy(int i, int j, int delflag) +{ + int k; + + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + mask[j] = mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + + h_q[j] = h_q[i]; + h_molecule(j) = h_molecule(i); + + h_num_bond(j) = h_num_bond(i); + for (k = 0; k < h_num_bond(j); k++) { + h_bond_type(j,k) = h_bond_type(i,k); + h_bond_atom(j,k) = h_bond_atom(i,k); + } + + h_nspecial(j,0) = h_nspecial(i,0); + h_nspecial(j,1) = h_nspecial(i,1); + h_nspecial(j,2) = h_nspecial(i,2); + for (k = 0; k < h_nspecial(j,2); k++) + h_special(j,k) = h_special(i,k); + + h_num_angle(j) = h_num_angle(i); + for (k = 0; k < h_num_angle(j); k++) { + h_angle_type(j,k) = h_angle_type(i,k); + h_angle_atom1(j,k) = h_angle_atom1(i,k); + h_angle_atom2(j,k) = h_angle_atom2(i,k); + h_angle_atom3(j,k) = h_angle_atom3(i,k); + } + + h_num_dihedral(j) = h_num_dihedral(i); + for (k = 0; k < h_num_dihedral(j); k++) { + h_dihedral_type(j,k) = h_dihedral_type(i,k); + h_dihedral_atom1(j,k) = h_dihedral_atom1(i,k); + h_dihedral_atom2(j,k) = h_dihedral_atom2(i,k); + h_dihedral_atom3(j,k) = h_dihedral_atom3(i,k); + h_dihedral_atom4(j,k) = h_dihedral_atom4(i,k); + } + + h_num_improper(j) = h_num_improper(i); + for (k = 0; k < h_num_improper(j); k++) { + h_improper_type(j,k) = h_improper_type(i,k); + h_improper_atom1(j,k) = h_improper_atom1(i,k); + h_improper_atom2(j,k) = h_improper_atom2(i,k); + h_improper_atom3(j,k) = h_improper_atom3(i,k); + h_improper_atom4(j,k) = h_improper_atom4(i,k); + } + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecFullKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecFullKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view<DeviceType>().dimension_0() + *buf.view<DeviceType>().dimension_1())/3; + const size_t elements = 3; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecFullKokkos_PackComm<LMPHostType,1,1> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecFullKokkos_PackComm<LMPHostType,1,0> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecFullKokkos_PackComm<LMPHostType,0,1> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecFullKokkos_PackComm<LMPHostType,0,0> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,1> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,0> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,1> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,0> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecFullKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_x_array _xw; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecFullKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst), + _list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, + const int nfirst, const int &pbc_flag, + const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,1> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,0> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,1> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,0> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,1> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,0> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,1> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,0> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecFullKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecFullKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecFullKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecFullKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) + modified(Host,F_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG> +struct AtomVecFullKokkos_PackBorder { + union ubuf { + double d; + int64_t i; + ubuf(double arg) : d(arg) {} + ubuf(int64_t arg) : i(arg) {} + ubuf(int arg) : i(arg) {} + }; + + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + typename AT::t_xfloat_2d _buf; + const typename AT::t_int_2d_const _list; + const int _iswap; + const typename AT::t_x_array_randomread _x; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_float_1d _q; + const typename AT::t_tagint_1d _molecule; + X_FLOAT _dx,_dy,_dz; + + AtomVecFullKokkos_PackBorder( + const typename AT::t_xfloat_2d &buf, + const typename AT::t_int_2d_const &list, + const int & iswap, + const typename AT::t_x_array &x, + const typename AT::t_tagint_1d &tag, + const typename AT::t_int_1d &type, + const typename AT::t_int_1d &mask, + const typename AT::t_float_1d &q, + const typename AT::t_tagint_1d &molecule, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = ubuf(_tag(j)).d; + _buf(i,4) = ubuf(_type(j)).d; + _buf(i,5) = ubuf(_mask(j)).d; + _buf(i,6) = _q(j); + _buf(i,7) = ubuf(_molecule(j)).d; + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = ubuf(_tag(j)).d; + _buf(i,4) = ubuf(_type(j)).d; + _buf(i,5) = ubuf(_mask(j)).d; + _buf(i,6) = _q(j); + _buf(i,7) = ubuf(_molecule(j)).d; + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecFullKokkos_PackBorder<LMPHostType,1> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecFullKokkos_PackBorder<LMPDeviceType,1> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecFullKokkos_PackBorder<LMPHostType,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecFullKokkos_PackBorder<LMPDeviceType,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q(j); + buf[m++] = ubuf(h_molecule(j)).d; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q(j); + buf[m++] = ubuf(h_molecule(j)).d; + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q(j); + buf[m++] = ubuf(h_molecule(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q(j); + buf[m++] = ubuf(h_molecule(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_q(j); + buf[m++] = ubuf(h_molecule(j)).d; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_q(j); + buf[m++] = ubuf(h_molecule(j)).d; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecFullKokkos_UnpackBorder { + union ubuf { + double d; + int64_t i; + ubuf(double arg) : d(arg) {} + ubuf(int64_t arg) : i(arg) {} + ubuf(int arg) : i(arg) {} + }; + + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + const typename AT::t_xfloat_2d_const _buf; + typename AT::t_x_array _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_float_1d _q; + typename AT::t_tagint_1d _molecule; + int _first; + + + AtomVecFullKokkos_UnpackBorder( + const typename AT::t_xfloat_2d_const &buf, + typename AT::t_x_array &x, + typename AT::t_tagint_1d &tag, + typename AT::t_int_1d &type, + typename AT::t_int_1d &mask, + typename AT::t_float_1d &q, + typename AT::t_tagint_1d &molecule, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), + _first(first){ + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = (tagint) ubuf(_buf(i,3)).i; + _type(i+_first) = (int) ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) ubuf(_buf(i,5)).i; + _q(i+_first) = _buf(i,6); + _molecule(i+_first) = (tagint) ubuf(_buf(i,7)).i; + + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space) { + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + while (first+n >= nmax) grow(0); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + if(space==Host) { + struct AtomVecFullKokkos_UnpackBorder<LMPHostType> + f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + struct AtomVecFullKokkos_UnpackBorder<LMPDeviceType> + f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::unpack_border(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + if(n > 0) + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_q(i) = buf[m++]; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_q(i) = buf[m++]; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_q(i) = buf[m++]; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecFullKokkos_PackExchangeFunctor { + + union ubuf { + double d; + int64_t i; + ubuf(double arg) : d(arg) {} + ubuf(int64_t arg) : i(arg) {} + ubuf(int arg) : i(arg) {} + }; + + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_float_1d_randomread _q; + typename AT::t_tagint_1d_randomread _molecule; + typename AT::t_int_2d_randomread _nspecial; + typename AT::t_tagint_2d_randomread _special; + typename AT::t_int_1d_randomread _num_bond; + typename AT::t_int_2d_randomread _bond_type; + typename AT::t_tagint_2d_randomread _bond_atom; + typename AT::t_int_1d_randomread _num_angle; + typename AT::t_int_2d_randomread _angle_type; + typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d_randomread _num_dihedral; + typename AT::t_int_2d_randomread _dihedral_type; + typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d_randomread _num_improper; + typename AT::t_int_2d_randomread _improper_type; + typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_float_1d _qw; + typename AT::t_tagint_1d _moleculew; + typename AT::t_int_2d _nspecialw; + typename AT::t_tagint_2d _specialw; + typename AT::t_int_1d _num_bondw; + typename AT::t_int_2d _bond_typew; + typename AT::t_tagint_2d _bond_atomw; + typename AT::t_int_1d _num_anglew; + typename AT::t_int_2d _angle_typew; + typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; + typename AT::t_int_1d _num_dihedralw; + typename AT::t_int_2d _dihedral_typew; + typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, + _dihedral_atom3w,_dihedral_atom4w; + typename AT::t_int_1d _num_improperw; + typename AT::t_int_2d _improper_typew; + typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, + _improper_atom3w,_improper_atom4w; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + size_t elements; + + AtomVecFullKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _q(atom->k_q.view<DeviceType>()), + _molecule(atom->k_molecule.view<DeviceType>()), + _nspecial(atom->k_nspecial.view<DeviceType>()), + _special(atom->k_special.view<DeviceType>()), + _num_bond(atom->k_num_bond.view<DeviceType>()), + _bond_type(atom->k_bond_type.view<DeviceType>()), + _bond_atom(atom->k_bond_atom.view<DeviceType>()), + _num_angle(atom->k_num_angle.view<DeviceType>()), + _angle_type(atom->k_angle_type.view<DeviceType>()), + _angle_atom1(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3(atom->k_angle_atom3.view<DeviceType>()), + _num_dihedral(atom->k_num_dihedral.view<DeviceType>()), + _dihedral_type(atom->k_dihedral_type.view<DeviceType>()), + _dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()), + _dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()), + _dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()), + _dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()), + _num_improper(atom->k_num_improper.view<DeviceType>()), + _improper_type(atom->k_improper_type.view<DeviceType>()), + _improper_atom1(atom->k_improper_atom1.view<DeviceType>()), + _improper_atom2(atom->k_improper_atom2.view<DeviceType>()), + _improper_atom3(atom->k_improper_atom3.view<DeviceType>()), + _improper_atom4(atom->k_improper_atom4.view<DeviceType>()), + _xw(atom->k_x.view<DeviceType>()), + _vw(atom->k_v.view<DeviceType>()), + _tagw(atom->k_tag.view<DeviceType>()), + _typew(atom->k_type.view<DeviceType>()), + _maskw(atom->k_mask.view<DeviceType>()), + _imagew(atom->k_image.view<DeviceType>()), + _qw(atom->k_q.view<DeviceType>()), + _moleculew(atom->k_molecule.view<DeviceType>()), + _nspecialw(atom->k_nspecial.view<DeviceType>()), + _specialw(atom->k_special.view<DeviceType>()), + _num_bondw(atom->k_num_bond.view<DeviceType>()), + _bond_typew(atom->k_bond_type.view<DeviceType>()), + _bond_atomw(atom->k_bond_atom.view<DeviceType>()), + _num_anglew(atom->k_num_angle.view<DeviceType>()), + _angle_typew(atom->k_angle_type.view<DeviceType>()), + _angle_atom1w(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2w(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3w(atom->k_angle_atom3.view<DeviceType>()), + _num_dihedralw(atom->k_num_dihedral.view<DeviceType>()), + _dihedral_typew(atom->k_dihedral_type.view<DeviceType>()), + _dihedral_atom1w(atom->k_dihedral_atom1.view<DeviceType>()), + _dihedral_atom2w(atom->k_dihedral_atom2.view<DeviceType>()), + _dihedral_atom3w(atom->k_dihedral_atom3.view<DeviceType>()), + _dihedral_atom4w(atom->k_dihedral_atom4.view<DeviceType>()), + _num_improperw(atom->k_num_improper.view<DeviceType>()), + _improper_typew(atom->k_improper_type.view<DeviceType>()), + _improper_atom1w(atom->k_improper_atom1.view<DeviceType>()), + _improper_atom2w(atom->k_improper_atom2.view<DeviceType>()), + _improper_atom3w(atom->k_improper_atom3.view<DeviceType>()), + _improper_atom4w(atom->k_improper_atom4.view<DeviceType>()), + _sendlist(sendlist.template view<DeviceType>()), + _copylist(copylist.template view<DeviceType>()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom + // 1 num_improper, 5*improper_per_atom + // 1 charge + // 1 to store buffer length + elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + int k; + const int i = _sendlist(mysend); + _buf(mysend,0) = elements; + int m = 1; + _buf(mysend,m++) = _x(i,0); + _buf(mysend,m++) = _x(i,1); + _buf(mysend,m++) = _x(i,2); + _buf(mysend,m++) = _v(i,0); + _buf(mysend,m++) = _v(i,1); + _buf(mysend,m++) = _v(i,2); + _buf(mysend,m++) = ubuf(_tag(i)).d; + _buf(mysend,m++) = ubuf(_type(i)).d; + _buf(mysend,m++) = ubuf(_mask(i)).d; + _buf(mysend,m++) = ubuf(_image(i)).d; + _buf(mysend,m++) = _q(i); + _buf(mysend,m++) = ubuf(_molecule(i)).d; + _buf(mysend,m++) = ubuf(_num_bond(i)).d; + for (k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = ubuf(_bond_atom(i,k)).d; + } + _buf(mysend,m++) = ubuf(_num_angle(i)).d; + for (k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = ubuf(_angle_atom3(i,k)).d; + } + _buf(mysend,m++) = ubuf(_num_dihedral(i)).d; + for (k = 0; k < _num_dihedral(i); k++) { + _buf(mysend,m++) = ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = ubuf(_dihedral_atom4(i,k)).d; + } + _buf(mysend,m++) = ubuf(_num_improper(i)).d; + for (k = 0; k < _num_improper(i); k++) { + _buf(mysend,m++) = ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = ubuf(_improper_atom4(i,k)).d; + } + + _buf(mysend,m++) = ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = ubuf(_nspecial(i,2)).d; + for (k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = ubuf(_special(i,k)).d; + + const int j = _copylist(mysend); + + if(j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + _qw(i) = _q(j); + _moleculew(i) = _molecule(j); + _num_bondw(i) = _num_bond(j); + for (k = 0; k < _num_bond(j); k++) { + _bond_typew(i,k) = _bond_type(j,k); + _bond_atomw(i,k) = _bond_atom(j,k); + } + _num_anglew(i) = _num_angle(j); + for (k = 0; k < _num_angle(j); k++) { + _angle_typew(i,k) = _angle_type(j,k); + _angle_atom1w(i,k) = _angle_atom1(j,k); + _angle_atom2w(i,k) = _angle_atom2(j,k); + _angle_atom3w(i,k) = _angle_atom3(j,k); + } + _num_dihedralw(i) = _num_dihedral(j); + for (k = 0; k < _num_dihedral(j); k++) { + _dihedral_typew(i,k) = _dihedral_type(j,k); + _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); + } + _num_improperw(i) = _num_improper(j); + for (k = 0; k < _num_improper(j); k++) { + _improper_typew(i,k) = _improper_type(j,k); + _improper_atom1w(i,k) = _improper_atom1(j,k); + _improper_atom2w(i,k) = _improper_atom2(j,k); + _improper_atom3w(i,k) = _improper_atom3(j,k); + _improper_atom4w(i,k) = _improper_atom4(j,k); + } + _nspecialw(i,0) = _nspecial(j,0); + _nspecialw(i,1) = _nspecial(j,1); + _nspecialw(i,2) = _nspecial(j,2); + for (k = 0; k < _nspecial(j,2); k++) + _specialw(i,k) = _special(j,k); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space,int dim,X_FLOAT lo, + X_FLOAT hi ) +{ + const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()* + k_buf.view<LMPHostType>().dimension_1())/elements) { + int newsize = nsend*elements/k_buf.view<LMPHostType>().dimension_1()+1; + k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1()); + } + if(space == Host) { + AtomVecFullKokkos_PackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPHostType::fence(); + return nsend*elements; + } else { + AtomVecFullKokkos_PackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPDeviceType::fence(); + return nsend*elements; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_exchange(int i, double *buf) +{ + int k; + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_q(i); + buf[m++] = ubuf(h_molecule(i)).d; + buf[m++] = ubuf(h_num_bond(i)).d; + for (k = 0; k < h_num_bond(i); k++) { + buf[m++] = ubuf(h_bond_type(i,k)).d; + buf[m++] = ubuf(h_bond_atom(i,k)).d; + } + buf[m++] = ubuf(h_num_angle(i)).d; + for (k = 0; k < h_num_angle(i); k++) { + buf[m++] = ubuf(h_angle_type(i,k)).d; + buf[m++] = ubuf(h_angle_atom1(i,k)).d; + buf[m++] = ubuf(h_angle_atom2(i,k)).d; + buf[m++] = ubuf(h_angle_atom3(i,k)).d; + } + buf[m++] = ubuf(h_num_dihedral(i)).d; + for (k = 0; k < h_num_dihedral(i); k++) { + buf[m++] = ubuf(h_dihedral_type(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom1(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom2(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom3(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom4(i,k)).d; + } + buf[m++] = ubuf(h_num_improper(i)).d; + for (k = 0; k < h_num_improper(i); k++) { + buf[m++] = ubuf(h_improper_type(i,k)).d; + buf[m++] = ubuf(h_improper_atom1(i,k)).d; + buf[m++] = ubuf(h_improper_atom2(i,k)).d; + buf[m++] = ubuf(h_improper_atom3(i,k)).d; + buf[m++] = ubuf(h_improper_atom4(i,k)).d; + } + buf[m++] = ubuf(h_nspecial(i,0)).d; + buf[m++] = ubuf(h_nspecial(i,1)).d; + buf[m++] = ubuf(h_nspecial(i,2)).d; + for (k = 0; k < h_nspecial(i,2); k++) + buf[m++] = ubuf(h_special(i,k)).d; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecFullKokkos_UnpackExchangeFunctor { + + union ubuf { + double d; + int64_t i; + ubuf(double arg) : d(arg) {} + ubuf(int64_t arg) : i(arg) {} + ubuf(int arg) : i(arg) {} + }; + + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_float_1d _q; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + typename AT::t_int_1d _num_angle; + typename AT::t_int_2d _angle_type; + typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d _num_dihedral; + typename AT::t_int_2d _dihedral_type; + typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d _num_improper; + typename AT::t_int_2d _improper_type; + typename AT::t_tagint_2d _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + size_t elements; + + AtomVecFullKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _q(atom->k_q.view<DeviceType>()), + _molecule(atom->k_molecule.view<DeviceType>()), + _nspecial(atom->k_nspecial.view<DeviceType>()), + _special(atom->k_special.view<DeviceType>()), + _num_bond(atom->k_num_bond.view<DeviceType>()), + _bond_type(atom->k_bond_type.view<DeviceType>()), + _bond_atom(atom->k_bond_atom.view<DeviceType>()), + _num_angle(atom->k_num_angle.view<DeviceType>()), + _angle_type(atom->k_angle_type.view<DeviceType>()), + _angle_atom1(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3(atom->k_angle_atom3.view<DeviceType>()), + _num_dihedral(atom->k_num_dihedral.view<DeviceType>()), + _dihedral_type(atom->k_dihedral_type.view<DeviceType>()), + _dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()), + _dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()), + _dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()), + _dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()), + _num_improper(atom->k_num_improper.view<DeviceType>()), + _improper_type(atom->k_improper_type.view<DeviceType>()), + _improper_atom1(atom->k_improper_atom1.view<DeviceType>()), + _improper_atom2(atom->k_improper_atom2.view<DeviceType>()), + _improper_atom3(atom->k_improper_atom3.view<DeviceType>()), + _improper_atom4(atom->k_improper_atom4.view<DeviceType>()), + _nlocal(nlocal.template view<DeviceType>()),_dim(dim), + _lo(lo),_hi(hi){ + + elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + int m = 1; + _x(i,0) = _buf(myrecv,m++); + _x(i,1) = _buf(myrecv,m++); + _x(i,2) = _buf(myrecv,m++); + _v(i,0) = _buf(myrecv,m++); + _v(i,1) = _buf(myrecv,m++); + _v(i,2) = _buf(myrecv,m++); + _tag(i) = (tagint) ubuf(_buf(myrecv,m++)).i; + _type(i) = (int) ubuf(_buf(myrecv,m++)).i; + _mask(i) = (int) ubuf(_buf(myrecv,m++)).i; + _image(i) = (imageint) ubuf(_buf(myrecv,m++)).i; + _q(i) = _buf(myrecv,m++); + _molecule(i) = (tagint) ubuf(_buf(myrecv,m++)).i; + _num_bond(i) = (int) ubuf(_buf(myrecv,m++)).i; + int k; + for (k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + } + _num_angle(i) = (int) ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + } + _num_dihedral(i) = (int) ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < _num_dihedral(i); k++) { + _dihedral_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + } + _num_improper(i) = (int) ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < _num_improper(i); k++) { + _improper_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + } + _nspecial(i,0) = (int) ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < _nspecial(i,2); k++) + _special(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i; + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) { + const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + LMPHostType::fence(); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify<LMPHostType>(); + k_count.sync<LMPDeviceType>(); + AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + LMPDeviceType::fence(); + k_count.modify<LMPDeviceType>(); + k_count.sync<LMPHostType>(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecFullKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | + ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); + + int k; + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_q(nlocal) = buf[m++]; + h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i; + + h_num_bond(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_bond(nlocal); k++) { + h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_num_angle(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_angle(nlocal); k++) { + h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_num_dihedral(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_dihedral(nlocal); k++) { + h_dihedral_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_dihedral_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_num_improper(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_improper(nlocal); k++) { + h_improper_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_improper_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_nspecial(nlocal,0) = (int) ubuf(buf[m++]).i; + h_nspecial(nlocal,1) = (int) ubuf(buf[m++]).i; + h_nspecial(nlocal,2) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_nspecial(nlocal,2); k++) + h_special(nlocal,k) = (tagint) ubuf(buf[m++]).i; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecFullKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 0; + for (i = 0; i < nlocal; i++) + n += 17 + 2*num_bond[i] + 4*num_angle[i] + + 5*num_dihedral[i] + 5*num_improper[i]; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_restart(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + + buf[m++] = h_q(i); + buf[m++] = ubuf(h_molecule(i)).d; + + buf[m++] = ubuf(h_num_bond(i)).d; + for (int k = 0; k < h_num_bond(i); k++) { + buf[m++] = ubuf(MAX(h_bond_type(i,k),-h_bond_type(i,k))).d; + buf[m++] = ubuf(h_bond_atom(i,k)).d; + } + + buf[m++] = ubuf(h_num_angle(i)).d; + for (int k = 0; k < h_num_angle(i); k++) { + buf[m++] = ubuf(MAX(h_angle_type(i,k),-h_angle_type(i,k))).d; + buf[m++] = ubuf(h_angle_atom1(i,k)).d; + buf[m++] = ubuf(h_angle_atom2(i,k)).d; + buf[m++] = ubuf(h_angle_atom3(i,k)).d; + } + + buf[m++] = ubuf(h_num_dihedral(i)).d; + for (int k = 0; k < h_num_dihedral(i); k++) { + buf[m++] = ubuf(MAX(h_dihedral_type(i,k),-h_dihedral_type(i,k))).d; + buf[m++] = ubuf(h_dihedral_atom1(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom2(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom3(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom4(i,k)).d; + } + + buf[m++] = ubuf(h_num_improper(i)).d; + for (int k = 0; k < h_num_improper(i); k++) { + buf[m++] = ubuf(MAX(h_improper_type(i,k),-h_improper_type(i,k))).d; + buf[m++] = ubuf(h_improper_atom1(i,k)).d; + buf[m++] = ubuf(h_improper_atom2(i,k)).d; + buf[m++] = ubuf(h_improper_atom3(i,k)).d; + buf[m++] = ubuf(h_improper_atom4(i,k)).d; + } + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecFullKokkos::unpack_restart(double *buf) +{ + int k; + + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + + h_q(nlocal) = buf[m++]; + h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i; + + h_num_bond(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_bond(nlocal); k++) { + h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_num_angle(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_angle(nlocal); k++) { + h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_num_dihedral(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_dihedral(nlocal); k++) { + h_dihedral_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_dihedral_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_num_improper(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_improper(nlocal); k++) { + h_improper_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_improper_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast<int> (ubuf(buf[m++]).i) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + atomKK->modified(Host,ALL_MASK); + grow(0); + } + atomKK->modified(Host,ALL_MASK); + + tag[nlocal] = 0; + type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask(nlocal) = 1; + h_image(nlocal) = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_q(nlocal) = 0.0; + h_molecule(nlocal) = 0; + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + h_num_dihedral(nlocal) = 0; + h_num_improper(nlocal) = 0; + h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::data_atom(double *coord, imageint imagetmp, + char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + h_tag(nlocal) = atoi(values[0]); + if (h_tag(nlocal) <= 0) + error->one(FLERR,"Invalid atom ID in Atoms section of data file"); + + h_molecule(nlocal) = atoi(values[1]); + if (h_molecule(nlocal) <= 0) + error->one(FLERR,"Invalid molecule ID in Atoms section of data file"); + + h_type(nlocal) = atoi(values[2]); + if (h_type(nlocal) <= 0 || h_type(nlocal) > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + h_q(nlocal) = atof(values[3]); + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image(nlocal) = imagetmp; + + h_mask(nlocal) = 1; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + h_num_dihedral(nlocal) = 0; + h_num_improper(nlocal) = 0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecFullKokkos::data_atom_hybrid(int nlocal, char **values) +{ + h_molecule(nlocal) = atoi(values[0]); + h_q(nlocal) = atof(values[1]); + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + h_num_dihedral(nlocal) = 0; + h_num_improper(nlocal) = 0; + return 2; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::pack_data(double **buf) +{ + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = h_tag(i); + buf[i][1] = h_molecule(i); + buf[i][2] = h_type(i); + buf[i][3] = h_q(i); + buf[i][4] = h_x(i,0); + buf[i][5] = h_x(i,1); + buf[i][6] = h_x(i,2); + buf[i][7] = (h_image[i] & IMGMASK) - IMGMAX; + buf[i][8] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; + buf[i][9] = (h_image[i] >> IMG2BITS) - IMGMAX; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecFullKokkos::pack_data_hybrid(int i, double *buf) +{ + buf[0] = h_molecule(i); + buf[1] = h_q(i); + return 2; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,"%d %d %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", + (int) buf[i][0],(int) buf[i][1], (int) buf[i][2], buf[i][3], + buf[i][4],buf[i][5],buf[i][6], + (int) buf[i][7],(int) buf[i][8],(int) buf[i][9]); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecFullKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," " TAGINT_FORMAT " %-1.16e",(tagint) ubuf(buf[0]).i,buf[1]); + return 2; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecFullKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); + + if (atom->memcheck("q")) bytes += memory->usage(q,nmax); + if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax); + if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3); + if (atom->memcheck("special")) + bytes += memory->usage(special,nmax,atom->maxspecial); + + if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax); + if (atom->memcheck("bond_type")) + bytes += memory->usage(bond_type,nmax,atom->bond_per_atom); + if (atom->memcheck("bond_atom")) + bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom); + + if (atom->memcheck("num_angle")) bytes += memory->usage(num_angle,nmax); + if (atom->memcheck("angle_type")) + bytes += memory->usage(angle_type,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom1")) + bytes += memory->usage(angle_atom1,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom2")) + bytes += memory->usage(angle_atom2,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom3")) + bytes += memory->usage(angle_atom3,nmax,atom->angle_per_atom); + + if (atom->memcheck("num_dihedral")) bytes += memory->usage(num_dihedral,nmax); + if (atom->memcheck("dihedral_type")) + bytes += memory->usage(dihedral_type,nmax,atom->dihedral_per_atom); + if (atom->memcheck("dihedral_atom1")) + bytes += memory->usage(dihedral_atom1,nmax,atom->dihedral_per_atom); + if (atom->memcheck("dihedral_atom2")) + bytes += memory->usage(dihedral_atom2,nmax,atom->dihedral_per_atom); + if (atom->memcheck("dihedral_atom3")) + bytes += memory->usage(dihedral_atom3,nmax,atom->dihedral_per_atom); + if (atom->memcheck("dihedral_atom4")) + bytes += memory->usage(dihedral_atom4,nmax,atom->dihedral_per_atom); + if (atom->memcheck("num_improper")) bytes += memory->usage(num_improper,nmax); + if (atom->memcheck("improper_type")) + bytes += memory->usage(improper_type,nmax,atom->improper_per_atom); + if (atom->memcheck("improper_atom1")) + bytes += memory->usage(improper_atom1,nmax,atom->improper_per_atom); + if (atom->memcheck("improper_atom2")) + bytes += memory->usage(improper_atom2,nmax,atom->improper_per_atom); + if (atom->memcheck("improper_atom3")) + bytes += memory->usage(improper_atom3,nmax,atom->improper_per_atom); + if (atom->memcheck("improper_atom4")) + bytes += memory->usage(improper_atom4,nmax,atom->improper_per_atom); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>(); + if (mask && Q_MASK) atomKK->k_q.sync<LMPDeviceType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPDeviceType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.sync<LMPDeviceType>(); + atomKK->k_special.sync<LMPDeviceType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.sync<LMPDeviceType>(); + atomKK->k_bond_type.sync<LMPDeviceType>(); + atomKK->k_bond_atom.sync<LMPDeviceType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.sync<LMPDeviceType>(); + atomKK->k_angle_type.sync<LMPDeviceType>(); + atomKK->k_angle_atom1.sync<LMPDeviceType>(); + atomKK->k_angle_atom2.sync<LMPDeviceType>(); + atomKK->k_angle_atom3.sync<LMPDeviceType>(); + } + if (mask && DIHEDRAL_MASK) { + atomKK->k_num_dihedral.sync<LMPDeviceType>(); + atomKK->k_dihedral_type.sync<LMPDeviceType>(); + atomKK->k_dihedral_atom1.sync<LMPDeviceType>(); + atomKK->k_dihedral_atom2.sync<LMPDeviceType>(); + atomKK->k_dihedral_atom3.sync<LMPDeviceType>(); + atomKK->k_dihedral_atom4.sync<LMPDeviceType>(); + } + if (mask && IMPROPER_MASK) { + atomKK->k_num_improper.sync<LMPDeviceType>(); + atomKK->k_improper_type.sync<LMPDeviceType>(); + atomKK->k_improper_atom1.sync<LMPDeviceType>(); + atomKK->k_improper_atom2.sync<LMPDeviceType>(); + atomKK->k_improper_atom3.sync<LMPDeviceType>(); + atomKK->k_improper_atom3.sync<LMPDeviceType>(); + } + } else { + if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>(); + if (mask && Q_MASK) atomKK->k_q.sync<LMPHostType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPHostType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.sync<LMPHostType>(); + atomKK->k_special.sync<LMPHostType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.sync<LMPHostType>(); + atomKK->k_bond_type.sync<LMPHostType>(); + atomKK->k_bond_atom.sync<LMPHostType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.sync<LMPHostType>(); + atomKK->k_angle_type.sync<LMPHostType>(); + atomKK->k_angle_atom1.sync<LMPHostType>(); + atomKK->k_angle_atom2.sync<LMPHostType>(); + atomKK->k_angle_atom3.sync<LMPHostType>(); + } + if (mask && DIHEDRAL_MASK) { + atomKK->k_num_dihedral.sync<LMPHostType>(); + atomKK->k_dihedral_type.sync<LMPHostType>(); + atomKK->k_dihedral_atom1.sync<LMPHostType>(); + atomKK->k_dihedral_atom2.sync<LMPHostType>(); + atomKK->k_dihedral_atom3.sync<LMPHostType>(); + atomKK->k_dihedral_atom4.sync<LMPHostType>(); + } + if (mask && IMPROPER_MASK) { + atomKK->k_num_improper.sync<LMPHostType>(); + atomKK->k_improper_type.sync<LMPHostType>(); + atomKK->k_improper_atom1.sync<LMPHostType>(); + atomKK->k_improper_atom2.sync<LMPHostType>(); + atomKK->k_improper_atom3.sync<LMPHostType>(); + atomKK->k_improper_atom3.sync<LMPHostType>(); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecFullKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>(); + if (mask && Q_MASK) atomKK->k_q.modify<LMPDeviceType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPDeviceType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.modify<LMPDeviceType>(); + atomKK->k_special.modify<LMPDeviceType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.modify<LMPDeviceType>(); + atomKK->k_bond_type.modify<LMPDeviceType>(); + atomKK->k_bond_atom.modify<LMPDeviceType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.modify<LMPDeviceType>(); + atomKK->k_angle_type.modify<LMPDeviceType>(); + atomKK->k_angle_atom1.modify<LMPDeviceType>(); + atomKK->k_angle_atom2.modify<LMPDeviceType>(); + atomKK->k_angle_atom3.modify<LMPDeviceType>(); + } + if (mask && DIHEDRAL_MASK) { + atomKK->k_num_dihedral.modify<LMPDeviceType>(); + atomKK->k_dihedral_type.modify<LMPDeviceType>(); + atomKK->k_dihedral_atom1.modify<LMPDeviceType>(); + atomKK->k_dihedral_atom2.modify<LMPDeviceType>(); + atomKK->k_dihedral_atom3.modify<LMPDeviceType>(); + atomKK->k_dihedral_atom4.modify<LMPDeviceType>(); + } + if (mask && IMPROPER_MASK) { + atomKK->k_num_improper.modify<LMPDeviceType>(); + atomKK->k_improper_type.modify<LMPDeviceType>(); + atomKK->k_improper_atom1.modify<LMPDeviceType>(); + atomKK->k_improper_atom2.modify<LMPDeviceType>(); + atomKK->k_improper_atom3.modify<LMPDeviceType>(); + atomKK->k_improper_atom3.modify<LMPDeviceType>(); + } + } else { + if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); + if (mask && Q_MASK) atomKK->k_q.modify<LMPHostType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPHostType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.modify<LMPHostType>(); + atomKK->k_special.modify<LMPHostType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.modify<LMPHostType>(); + atomKK->k_bond_type.modify<LMPHostType>(); + atomKK->k_bond_atom.modify<LMPHostType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.modify<LMPHostType>(); + atomKK->k_angle_type.modify<LMPHostType>(); + atomKK->k_angle_atom1.modify<LMPHostType>(); + atomKK->k_angle_atom2.modify<LMPHostType>(); + atomKK->k_angle_atom3.modify<LMPHostType>(); + } + if (mask && DIHEDRAL_MASK) { + atomKK->k_num_dihedral.modify<LMPHostType>(); + atomKK->k_dihedral_type.modify<LMPHostType>(); + atomKK->k_dihedral_atom1.modify<LMPHostType>(); + atomKK->k_dihedral_atom2.modify<LMPHostType>(); + atomKK->k_dihedral_atom3.modify<LMPHostType>(); + atomKK->k_dihedral_atom4.modify<LMPHostType>(); + } + if (mask && IMPROPER_MASK) { + atomKK->k_num_improper.modify<LMPHostType>(); + atomKK->k_improper_type.modify<LMPHostType>(); + atomKK->k_improper_atom1.modify<LMPHostType>(); + atomKK->k_improper_atom2.modify<LMPHostType>(); + atomKK->k_improper_atom3.modify<LMPHostType>(); + atomKK->k_improper_atom3.modify<LMPHostType>(); + } + } +} diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h new file mode 100644 index 0000000000..43290bb219 --- /dev/null +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -0,0 +1,183 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(full/kk,AtomVecFullKokkos) + +#else + +#ifndef LMP_ATOM_VEC_FULL_KOKKOS_H +#define LMP_ATOM_VEC_FULL_KOKKOS_H + +#include "atom_vec_kokkos.h" + +namespace LAMMPS_NS { + +class AtomVecFullKokkos : public AtomVecKokkos { + public: + AtomVecFullKokkos(class LAMMPS *); + virtual ~AtomVecFullKokkos() {} + void grow(int); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int pack_reverse(int, int, double *); + void unpack_reverse(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, tagint, char **); + int data_atom_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + bigint memory_usage(); + + void grow_reset(); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + + protected: + + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; + + double *q; + + tagint *molecule; + int **nspecial; + tagint **special; + int *num_bond; + int **bond_type; + tagint **bond_atom; + + int *num_angle; + int **angle_type; + tagint **angle_atom1,**angle_atom2,**angle_atom3; + + int *num_dihedral; + int **dihedral_type; + tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4; + int *num_improper; + int **improper_type; + tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4; + + DAT::t_tagint_1d d_tag; + DAT::t_int_1d d_type, d_mask; + HAT::t_tagint_1d h_tag; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + + DAT::t_float_1d d_q; + HAT::t_float_1d h_q; + + DAT::t_tagint_1d d_molecule; + DAT::t_int_2d d_nspecial; + DAT::t_tagint_2d d_special; + DAT::t_int_1d d_num_bond; + DAT::t_int_2d d_bond_type; + DAT::t_tagint_2d d_bond_atom; + + HAT::t_tagint_1d h_molecule; + HAT::t_int_2d h_nspecial; + HAT::t_tagint_2d h_special; + HAT::t_int_1d h_num_bond; + HAT::t_int_2d h_bond_type; + HAT::t_tagint_2d h_bond_atom; + + DAT::t_int_1d d_num_angle; + DAT::t_int_2d d_angle_type; + DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; + + HAT::t_int_1d h_num_angle; + HAT::t_int_2d h_angle_type; + HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; + + DAT::t_int_1d d_num_dihedral; + DAT::t_int_2d d_dihedral_type; + DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, + d_dihedral_atom3,d_dihedral_atom4; + DAT::t_int_1d d_num_improper; + DAT::t_int_2d d_improper_type; + DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, + d_improper_atom3,d_improper_atom4; + + HAT::t_int_1d h_num_dihedral; + HAT::t_int_2d h_dihedral_type; + HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, + h_dihedral_atom3,h_dihedral_atom4; + HAT::t_int_1d h_num_improper; + HAT::t_int_2d h_improper_type; + HAT::t_tagint_2d h_improper_atom1,h_improper_atom2, + h_improper_atom3,h_improper_atom4; + + HAT::tdual_int_1d k_count; + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index ac651b0b5a..e555f587e5 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -24,43 +24,43 @@ class AtomVecKokkos : public AtomVec { AtomVecKokkos(class LAMMPS *); virtual ~AtomVecKokkos() {} - virtual void sync(ExecutionSpace space, unsigned int mask) {}; - virtual void modified(ExecutionSpace space, unsigned int mask) {}; + virtual void sync(ExecutionSpace space, unsigned int mask) = 0; + virtual void modified(ExecutionSpace space, unsigned int mask) = 0; virtual int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]) - {return 0;} + const int &pbc_flag, const int pbc[]) = 0; + //{return 0;} virtual int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]) - {return 0;} + const int &pbc_flag, const int pbc[]) = 0; + //{return 0;} virtual void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf) {}; + const DAT::tdual_xfloat_2d &buf) = 0; virtual int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, - int pbc_flag, int *pbc, ExecutionSpace space) - {return 0;}; + int pbc_flag, int *pbc, ExecutionSpace space) = 0; + //{return 0;}; virtual void unpack_border_kokkos(const int &n, const int &nfirst, const DAT::tdual_xfloat_2d &buf, - ExecutionSpace space) {}; + ExecutionSpace space) = 0; virtual int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) - {return 0;}; + ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0; + //{return 0;}; virtual int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space) - {return 0;}; + ExecutionSpace space) = 0; + //{return 0;}; protected: class AtomKokkos *atomKK; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp new file mode 100644 index 0000000000..b75023cd37 --- /dev/null +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -0,0 +1,2236 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "stdlib.h" +#include "atom_vec_molecular_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +/* ---------------------------------------------------------------------- */ + +AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 1; + bonds_allow = angles_allow = dihedrals_allow = impropers_allow = 1; + mass_type = 1; + + comm_x_only = comm_f_only = 1; + size_forward = 3; + size_reverse = 3; + size_border = 7; + size_velocity = 3; + size_data_atom = 6; + size_data_vel = 4; + xcol_data = 4; + + atom->molecule_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by DELTA + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + + memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); + memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); + memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, + "atom:special"); + memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); + memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, + "atom:bond_type"); + memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, + "atom:bond_atom"); + + memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); + memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, + "atom:angle_type"); + memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, + "atom:angle_atom1"); + memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, + "atom:angle_atom2"); + memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, + "atom:angle_atom3"); + + memory->grow_kokkos(atomKK->k_num_dihedral,atomKK->num_dihedral,nmax,"atom:num_dihedral"); + memory->grow_kokkos(atomKK->k_dihedral_type,atomKK->dihedral_type,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_type"); + memory->grow_kokkos(atomKK->k_dihedral_atom1,atomKK->dihedral_atom1,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_atom1"); + memory->grow_kokkos(atomKK->k_dihedral_atom2,atomKK->dihedral_atom2,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_atom2"); + memory->grow_kokkos(atomKK->k_dihedral_atom3,atomKK->dihedral_atom3,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_atom3"); + memory->grow_kokkos(atomKK->k_dihedral_atom4,atomKK->dihedral_atom4,nmax, + atomKK->dihedral_per_atom,"atom:dihedral_atom4"); + + memory->grow_kokkos(atomKK->k_num_improper,atomKK->num_improper,nmax,"atom:num_improper"); + memory->grow_kokkos(atomKK->k_improper_type,atomKK->improper_type,nmax, + atomKK->improper_per_atom,"atom:improper_type"); + memory->grow_kokkos(atomKK->k_improper_atom1,atomKK->improper_atom1,nmax, + atomKK->improper_per_atom,"atom:improper_atom1"); + memory->grow_kokkos(atomKK->k_improper_atom2,atomKK->improper_atom2,nmax, + atomKK->improper_per_atom,"atom:improper_atom2"); + memory->grow_kokkos(atomKK->k_improper_atom3,atomKK->improper_atom3,nmax, + atomKK->improper_per_atom,"atom:improper_atom3"); + memory->grow_kokkos(atomKK->k_improper_atom4,atomKK->improper_atom4,nmax, + atomKK->improper_per_atom,"atom:improper_atom4"); + + grow_reset(); + sync(Host,ALL_MASK); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + molecule = atomKK->molecule; + d_molecule = atomKK->k_molecule.d_view; + h_molecule = atomKK->k_molecule.h_view; + nspecial = atomKK->nspecial; + d_nspecial = atomKK->k_nspecial.d_view; + h_nspecial = atomKK->k_nspecial.h_view; + special = atomKK->special; + d_special = atomKK->k_special.d_view; + h_special = atomKK->k_special.h_view; + num_bond = atomKK->num_bond; + d_num_bond = atomKK->k_num_bond.d_view; + h_num_bond = atomKK->k_num_bond.h_view; + bond_type = atomKK->bond_type; + d_bond_type = atomKK->k_bond_type.d_view; + h_bond_type = atomKK->k_bond_type.h_view; + bond_atom = atomKK->bond_atom; + d_bond_atom = atomKK->k_bond_atom.d_view; + h_bond_atom = atomKK->k_bond_atom.h_view; + + num_angle = atomKK->num_angle; + d_num_angle = atomKK->k_num_angle.d_view; + h_num_angle = atomKK->k_num_angle.h_view; + angle_type = atomKK->angle_type; + d_angle_type = atomKK->k_angle_type.d_view; + h_angle_type = atomKK->k_angle_type.h_view; + angle_atom1 = atomKK->angle_atom1; + d_angle_atom1 = atomKK->k_angle_atom1.d_view; + h_angle_atom1 = atomKK->k_angle_atom1.h_view; + angle_atom2 = atomKK->angle_atom2; + d_angle_atom2 = atomKK->k_angle_atom2.d_view; + h_angle_atom2 = atomKK->k_angle_atom2.h_view; + angle_atom3 = atomKK->angle_atom3; + d_angle_atom3 = atomKK->k_angle_atom3.d_view; + h_angle_atom3 = atomKK->k_angle_atom3.h_view; + + num_dihedral = atomKK->num_dihedral; + d_num_dihedral = atomKK->k_num_dihedral.d_view; + h_num_dihedral = atomKK->k_num_dihedral.h_view; + dihedral_type = atomKK->dihedral_type; + d_dihedral_type = atomKK->k_dihedral_type.d_view; + h_dihedral_type = atomKK->k_dihedral_type.h_view; + dihedral_atom1 = atomKK->dihedral_atom1; + d_dihedral_atom1 = atomKK->k_dihedral_atom1.d_view; + h_dihedral_atom1 = atomKK->k_dihedral_atom1.h_view; + dihedral_atom2 = atomKK->dihedral_atom2; + d_dihedral_atom2 = atomKK->k_dihedral_atom2.d_view; + h_dihedral_atom2 = atomKK->k_dihedral_atom2.h_view; + dihedral_atom3 = atomKK->dihedral_atom3; + d_dihedral_atom3 = atomKK->k_dihedral_atom3.d_view; + h_dihedral_atom3 = atomKK->k_dihedral_atom3.h_view; + dihedral_atom4 = atomKK->dihedral_atom4; + d_dihedral_atom4 = atomKK->k_dihedral_atom4.d_view; + h_dihedral_atom4 = atomKK->k_dihedral_atom4.h_view; + + num_improper = atomKK->num_improper; + d_num_improper = atomKK->k_num_improper.d_view; + h_num_improper = atomKK->k_num_improper.h_view; + improper_type = atomKK->improper_type; + d_improper_type = atomKK->k_improper_type.d_view; + h_improper_type = atomKK->k_improper_type.h_view; + improper_atom1 = atomKK->improper_atom1; + d_improper_atom1 = atomKK->k_improper_atom1.d_view; + h_improper_atom1 = atomKK->k_improper_atom1.h_view; + improper_atom2 = atomKK->improper_atom2; + d_improper_atom2 = atomKK->k_improper_atom2.d_view; + h_improper_atom2 = atomKK->k_improper_atom2.h_view; + improper_atom3 = atomKK->improper_atom3; + d_improper_atom3 = atomKK->k_improper_atom3.d_view; + h_improper_atom3 = atomKK->k_improper_atom3.h_view; + improper_atom4 = atomKK->improper_atom4; + d_improper_atom4 = atomKK->k_improper_atom4.d_view; + h_improper_atom4 = atomKK->k_improper_atom4.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::copy(int i, int j, int delflag) +{ + int k; + + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + mask[j] = mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + + h_molecule(j) = h_molecule(i); + + h_num_bond(j) = h_num_bond(i); + for (k = 0; k < h_num_bond(j); k++) { + h_bond_type(j,k) = h_bond_type(i,k); + h_bond_atom(j,k) = h_bond_atom(i,k); + } + + h_nspecial(j,0) = h_nspecial(i,0); + h_nspecial(j,1) = h_nspecial(i,1); + h_nspecial(j,2) = h_nspecial(i,2); + for (k = 0; k < h_nspecial(j,2); k++) + h_special(j,k) = h_special(i,k); + + h_num_angle(j) = h_num_angle(i); + for (k = 0; k < h_num_angle(j); k++) { + h_angle_type(j,k) = h_angle_type(i,k); + h_angle_atom1(j,k) = h_angle_atom1(i,k); + h_angle_atom2(j,k) = h_angle_atom2(i,k); + h_angle_atom3(j,k) = h_angle_atom3(i,k); + } + + h_num_dihedral(j) = h_num_dihedral(i); + for (k = 0; k < h_num_dihedral(j); k++) { + h_dihedral_type(j,k) = h_dihedral_type(i,k); + h_dihedral_atom1(j,k) = h_dihedral_atom1(i,k); + h_dihedral_atom2(j,k) = h_dihedral_atom2(i,k); + h_dihedral_atom3(j,k) = h_dihedral_atom3(i,k); + h_dihedral_atom4(j,k) = h_dihedral_atom4(i,k); + } + + h_num_improper(j) = h_num_improper(i); + for (k = 0; k < h_num_improper(j); k++) { + h_improper_type(j,k) = h_improper_type(i,k); + h_improper_atom1(j,k) = h_improper_atom1(i,k); + h_improper_atom2(j,k) = h_improper_atom2(i,k); + h_improper_atom3(j,k) = h_improper_atom3(i,k); + h_improper_atom4(j,k) = h_improper_atom4(i,k); + } + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecMolecularKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecMolecularKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view<DeviceType>().dimension_0() + *buf.view<DeviceType>().dimension_1())/3; + const size_t elements = 3; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,0> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,1> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,0> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,0> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,1> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,0> + f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecMolecularKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_x_array _xw; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecMolecularKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst), + _list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, + const int nfirst, const int &pbc_flag, + const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,0> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,1> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,0> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,0> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,1> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,0> + f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecMolecularKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecMolecularKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecMolecularKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) + modified(Host,F_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG> +struct AtomVecMolecularKokkos_PackBorder { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + typename AT::t_xfloat_2d _buf; + const typename AT::t_int_2d_const _list; + const int _iswap; + const typename AT::t_x_array_randomread _x; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_tagint_1d _molecule; + X_FLOAT _dx,_dy,_dz; + + AtomVecMolecularKokkos_PackBorder( + const typename AT::t_xfloat_2d &buf, + const typename AT::t_int_2d_const &list, + const int & iswap, + const typename AT::t_x_array &x, + const typename AT::t_tagint_1d &tag, + const typename AT::t_int_1d &type, + const typename AT::t_int_1d &mask, + const typename AT::t_tagint_1d &molecule, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _molecule(j); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _molecule(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecMolecularKokkos_PackBorder<LMPHostType,1> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecMolecularKokkos_PackBorder<LMPDeviceType,1> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecMolecularKokkos_PackBorder<LMPHostType,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecMolecularKokkos_PackBorder<LMPDeviceType,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = ubuf(h_molecule(j)).d; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_molecule(j); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecMolecularKokkos_UnpackBorder { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + const typename AT::t_xfloat_2d_const _buf; + typename AT::t_x_array _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_tagint_1d _molecule; + int _first; + + + AtomVecMolecularKokkos_UnpackBorder( + const typename AT::t_xfloat_2d_const &buf, + typename AT::t_x_array &x, + typename AT::t_tagint_1d &tag, + typename AT::t_int_1d &type, + typename AT::t_int_1d &mask, + typename AT::t_tagint_1d &molecule, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), + _first(first){ + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast<int> (_buf(i,3)); + _type(i+_first) = static_cast<int> (_buf(i,4)); + _mask(i+_first) = static_cast<int> (_buf(i,5)); + _molecule(i+_first) = static_cast<int> (_buf(i,6)); + + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space) { + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + while (first+n >= nmax) grow(0); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + if(space==Host) { + struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType> + f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + struct AtomVecMolecularKokkos_UnpackBorder<LMPDeviceType> + f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::unpack_border(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) + h_molecule(i) = (tagint) ubuf(buf[m++]).i; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecMolecularKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_tagint_1d_randomread _molecule; + typename AT::t_int_2d_randomread _nspecial; + typename AT::t_tagint_2d_randomread _special; + typename AT::t_int_1d_randomread _num_bond; + typename AT::t_int_2d_randomread _bond_type; + typename AT::t_tagint_2d_randomread _bond_atom; + typename AT::t_int_1d_randomread _num_angle; + typename AT::t_int_2d_randomread _angle_type; + typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d_randomread _num_dihedral; + typename AT::t_int_2d_randomread _dihedral_type; + typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d_randomread _num_improper; + typename AT::t_int_2d_randomread _improper_type; + typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_tagint_1d _moleculew; + typename AT::t_int_2d _nspecialw; + typename AT::t_tagint_2d _specialw; + typename AT::t_int_1d _num_bondw; + typename AT::t_int_2d _bond_typew; + typename AT::t_tagint_2d _bond_atomw; + typename AT::t_int_1d _num_anglew; + typename AT::t_int_2d _angle_typew; + typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; + typename AT::t_int_1d _num_dihedralw; + typename AT::t_int_2d _dihedral_typew; + typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, + _dihedral_atom3w,_dihedral_atom4w; + typename AT::t_int_1d _num_improperw; + typename AT::t_int_2d _improper_typew; + typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, + _improper_atom3w,_improper_atom4w; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + size_t elements; + + AtomVecMolecularKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _molecule(atom->k_molecule.view<DeviceType>()), + _nspecial(atom->k_nspecial.view<DeviceType>()), + _special(atom->k_special.view<DeviceType>()), + _num_bond(atom->k_num_bond.view<DeviceType>()), + _bond_type(atom->k_bond_type.view<DeviceType>()), + _bond_atom(atom->k_bond_atom.view<DeviceType>()), + _num_angle(atom->k_num_angle.view<DeviceType>()), + _angle_type(atom->k_angle_type.view<DeviceType>()), + _angle_atom1(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3(atom->k_angle_atom3.view<DeviceType>()), + _num_dihedral(atom->k_num_dihedral.view<DeviceType>()), + _dihedral_type(atom->k_dihedral_type.view<DeviceType>()), + _dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()), + _dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()), + _dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()), + _dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()), + _num_improper(atom->k_num_improper.view<DeviceType>()), + _improper_type(atom->k_improper_type.view<DeviceType>()), + _improper_atom1(atom->k_improper_atom1.view<DeviceType>()), + _improper_atom2(atom->k_improper_atom2.view<DeviceType>()), + _improper_atom3(atom->k_improper_atom3.view<DeviceType>()), + _improper_atom4(atom->k_improper_atom4.view<DeviceType>()), + _xw(atom->k_x.view<DeviceType>()), + _vw(atom->k_v.view<DeviceType>()), + _tagw(atom->k_tag.view<DeviceType>()), + _typew(atom->k_type.view<DeviceType>()), + _maskw(atom->k_mask.view<DeviceType>()), + _imagew(atom->k_image.view<DeviceType>()), + _moleculew(atom->k_molecule.view<DeviceType>()), + _nspecialw(atom->k_nspecial.view<DeviceType>()), + _specialw(atom->k_special.view<DeviceType>()), + _num_bondw(atom->k_num_bond.view<DeviceType>()), + _bond_typew(atom->k_bond_type.view<DeviceType>()), + _bond_atomw(atom->k_bond_atom.view<DeviceType>()), + _num_anglew(atom->k_num_angle.view<DeviceType>()), + _angle_typew(atom->k_angle_type.view<DeviceType>()), + _angle_atom1w(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2w(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3w(atom->k_angle_atom3.view<DeviceType>()), + _num_dihedralw(atom->k_num_dihedral.view<DeviceType>()), + _dihedral_typew(atom->k_dihedral_type.view<DeviceType>()), + _dihedral_atom1w(atom->k_dihedral_atom1.view<DeviceType>()), + _dihedral_atom2w(atom->k_dihedral_atom2.view<DeviceType>()), + _dihedral_atom3w(atom->k_dihedral_atom3.view<DeviceType>()), + _dihedral_atom4w(atom->k_dihedral_atom4.view<DeviceType>()), + _num_improperw(atom->k_num_improper.view<DeviceType>()), + _improper_typew(atom->k_improper_type.view<DeviceType>()), + _improper_atom1w(atom->k_improper_atom1.view<DeviceType>()), + _improper_atom2w(atom->k_improper_atom2.view<DeviceType>()), + _improper_atom3w(atom->k_improper_atom3.view<DeviceType>()), + _improper_atom4w(atom->k_improper_atom4.view<DeviceType>()), + _sendlist(sendlist.template view<DeviceType>()), + _copylist(copylist.template view<DeviceType>()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom + // 1 num_improper, 5*improper_per_atom + // 1 to store buffer length + elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + int k; + const int i = _sendlist(mysend); + _buf(mysend,0) = elements; + int m = 1; + _buf(mysend,m++) = _x(i,0); + _buf(mysend,m++) = _x(i,1); + _buf(mysend,m++) = _x(i,2); + _buf(mysend,m++) = _v(i,0); + _buf(mysend,m++) = _v(i,1); + _buf(mysend,m++) = _v(i,2); + _buf(mysend,m++) = _tag(i); + _buf(mysend,m++) = _type(i); + _buf(mysend,m++) = _mask(i); + _buf(mysend,m++) = _image(i); + _buf(mysend,m++) = _molecule(i); + _buf(mysend,m++) = _num_bond(i); + for (k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = _bond_type(i,k); + _buf(mysend,m++) = _bond_atom(i,k); + } + _buf(mysend,m++) = _num_angle(i); + for (k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = _angle_type(i,k); + _buf(mysend,m++) = _angle_atom1(i,k); + _buf(mysend,m++) = _angle_atom2(i,k); + _buf(mysend,m++) = _angle_atom3(i,k); + } + _buf(mysend,m++) = _num_dihedral(i); + for (k = 0; k < _num_dihedral(i); k++) { + _buf(mysend,m++) = _dihedral_type(i,k); + _buf(mysend,m++) = _dihedral_atom1(i,k); + _buf(mysend,m++) = _dihedral_atom2(i,k); + _buf(mysend,m++) = _dihedral_atom3(i,k); + _buf(mysend,m++) = _dihedral_atom4(i,k); + } + _buf(mysend,m++) = _num_improper(i); + for (k = 0; k < _num_improper(i); k++) { + _buf(mysend,m++) = _improper_type(i,k); + _buf(mysend,m++) = _improper_atom1(i,k); + _buf(mysend,m++) = _improper_atom2(i,k); + _buf(mysend,m++) = _improper_atom3(i,k); + _buf(mysend,m++) = _improper_atom4(i,k); + } + + _buf(mysend,m++) = _nspecial(i,0); + _buf(mysend,m++) = _nspecial(i,1); + _buf(mysend,m++) = _nspecial(i,2); + for (k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = _special(i,k); + + const int j = _copylist(mysend); + + if(j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + _moleculew(i) = _molecule(j); + _num_bondw(i) = _num_bond(j); + for (k = 0; k < _num_bond(j); k++) { + _bond_typew(i,k) = _bond_type(j,k); + _bond_atomw(i,k) = _bond_atom(j,k); + } + _num_anglew(i) = _num_angle(j); + for (k = 0; k < _num_angle(j); k++) { + _angle_typew(i,k) = _angle_type(j,k); + _angle_atom1w(i,k) = _angle_atom1(j,k); + _angle_atom2w(i,k) = _angle_atom2(j,k); + _angle_atom3w(i,k) = _angle_atom3(j,k); + } + _num_dihedralw(i) = _num_dihedral(j); + for (k = 0; k < _num_dihedral(j); k++) { + _dihedral_typew(i,k) = _dihedral_type(j,k); + _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); + } + _num_improperw(i) = _num_improper(j); + for (k = 0; k < _num_improper(j); k++) { + _improper_typew(i,k) = _improper_type(j,k); + _improper_atom1w(i,k) = _improper_atom1(j,k); + _improper_atom2w(i,k) = _improper_atom2(j,k); + _improper_atom3w(i,k) = _improper_atom3(j,k); + _improper_atom4w(i,k) = _improper_atom4(j,k); + } + _nspecialw(i,0) = _nspecial(j,0); + _nspecialw(i,1) = _nspecial(j,1); + _nspecialw(i,2) = _nspecial(j,2); + for (k = 0; k < _nspecial(j,2); k++) + _specialw(i,k) = _special(j,k); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space,int dim,X_FLOAT lo, + X_FLOAT hi ) +{ + const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()* + k_buf.view<LMPHostType>().dimension_1())/elements) { + int newsize = nsend*elements/k_buf.view<LMPHostType>().dimension_1()+1; + k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1()); + } + if(space == Host) { + AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPHostType::fence(); + return nsend*elements; + } else { + AtomVecMolecularKokkos_PackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPDeviceType::fence(); + return nsend*elements; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_exchange(int i, double *buf) +{ + int k; + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = ubuf(h_molecule(i)).d; + + buf[m++] = ubuf(h_num_bond(i)).d; + for (k = 0; k < h_num_bond(i); k++) { + buf[m++] = ubuf(h_bond_type(i,k)).d; + buf[m++] = ubuf(h_bond_atom(i,k)).d; + } + buf[m++] = ubuf(h_num_angle(i)).d; + for (k = 0; k < h_num_angle(i); k++) { + buf[m++] = ubuf(h_angle_type(i,k)).d; + buf[m++] = ubuf(h_angle_atom1(i,k)).d; + buf[m++] = ubuf(h_angle_atom2(i,k)).d; + buf[m++] = ubuf(h_angle_atom3(i,k)).d; + } + buf[m++] = ubuf(h_num_dihedral(i)).d; + for (k = 0; k < h_num_dihedral(i); k++) { + buf[m++] = ubuf(h_dihedral_type(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom1(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom2(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom3(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom4(i,k)).d; + } + buf[m++] = ubuf(h_num_improper(i)).d; + for (k = 0; k < h_num_improper(i); k++) { + buf[m++] = ubuf(h_improper_type(i,k)).d; + buf[m++] = ubuf(h_improper_atom1(i,k)).d; + buf[m++] = ubuf(h_improper_atom2(i,k)).d; + buf[m++] = ubuf(h_improper_atom3(i,k)).d; + buf[m++] = ubuf(h_improper_atom4(i,k)).d; + } + buf[m++] = ubuf(h_nspecial(i,0)).d; + buf[m++] = ubuf(h_nspecial(i,1)).d; + buf[m++] = ubuf(h_nspecial(i,2)).d; + for (k = 0; k < h_nspecial(i,2); k++) + buf[m++] = ubuf(h_special(i,k)).d; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecMolecularKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + typename AT::t_int_1d _num_angle; + typename AT::t_int_2d _angle_type; + typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d _num_dihedral; + typename AT::t_int_2d _dihedral_type; + typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d _num_improper; + typename AT::t_int_2d _improper_type; + typename AT::t_tagint_2d _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + size_t elements; + + AtomVecMolecularKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _molecule(atom->k_molecule.view<DeviceType>()), + _nspecial(atom->k_nspecial.view<DeviceType>()), + _special(atom->k_special.view<DeviceType>()), + _num_bond(atom->k_num_bond.view<DeviceType>()), + _bond_type(atom->k_bond_type.view<DeviceType>()), + _bond_atom(atom->k_bond_atom.view<DeviceType>()), + _num_angle(atom->k_num_angle.view<DeviceType>()), + _angle_type(atom->k_angle_type.view<DeviceType>()), + _angle_atom1(atom->k_angle_atom1.view<DeviceType>()), + _angle_atom2(atom->k_angle_atom2.view<DeviceType>()), + _angle_atom3(atom->k_angle_atom3.view<DeviceType>()), + _num_dihedral(atom->k_num_dihedral.view<DeviceType>()), + _dihedral_type(atom->k_dihedral_type.view<DeviceType>()), + _dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()), + _dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()), + _dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()), + _dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()), + _num_improper(atom->k_num_improper.view<DeviceType>()), + _improper_type(atom->k_improper_type.view<DeviceType>()), + _improper_atom1(atom->k_improper_atom1.view<DeviceType>()), + _improper_atom2(atom->k_improper_atom2.view<DeviceType>()), + _improper_atom3(atom->k_improper_atom3.view<DeviceType>()), + _improper_atom4(atom->k_improper_atom4.view<DeviceType>()), + _nlocal(nlocal.template view<DeviceType>()),_dim(dim), + _lo(lo),_hi(hi){ + + elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + const int maxsendlist = (buf.template view<DeviceType>().dimension_0()* + buf.template view<DeviceType>().dimension_1())/elements; + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + int m = 1; + _x(i,0) = _buf(myrecv,m++); + _x(i,1) = _buf(myrecv,m++); + _x(i,2) = _buf(myrecv,m++); + _v(i,0) = _buf(myrecv,m++); + _v(i,1) = _buf(myrecv,m++); + _v(i,2) = _buf(myrecv,m++); + _tag(i) = _buf(myrecv,m++); + _type(i) = _buf(myrecv,m++); + _mask(i) = _buf(myrecv,m++); + _image(i) = _buf(myrecv,m++); + + _molecule(i) = _buf(myrecv,m++); + _num_bond(i) = _buf(myrecv,m++); + int k; + for (k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = _buf(myrecv,m++); + _bond_atom(i,k) = _buf(myrecv,m++); + } + _num_angle(i) = _buf(myrecv,m++); + for (k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = _buf(myrecv,m++); + _angle_atom1(i,k) = _buf(myrecv,m++); + _angle_atom2(i,k) = _buf(myrecv,m++); + _angle_atom3(i,k) = _buf(myrecv,m++); + } + _num_dihedral(i) = _buf(myrecv,m++); + for (k = 0; k < _num_dihedral(i); k++) { + _dihedral_type(i,k) = _buf(myrecv,m++); + _dihedral_atom1(i,k) = _buf(myrecv,m++); + _dihedral_atom2(i,k) = _buf(myrecv,m++); + _dihedral_atom3(i,k) = _buf(myrecv,m++); + _dihedral_atom4(i,k) = _buf(myrecv,m++); + } + _num_improper(i) = _buf(myrecv,m++); + for (k = 0; k < _num_improper(i); k++) { + _improper_type(i,k) = _buf(myrecv,m++); + _improper_atom1(i,k) = _buf(myrecv,m++); + _improper_atom2(i,k) = _buf(myrecv,m++); + _improper_atom3(i,k) = _buf(myrecv,m++); + _improper_atom4(i,k) = _buf(myrecv,m++); + } + _nspecial(i,0) = _buf(myrecv,m++); + _nspecial(i,1) = _buf(myrecv,m++); + _nspecial(i,2) = _buf(myrecv,m++); + for (k = 0; k < _nspecial(i,2); k++) + _special(i,k) = _buf(myrecv,m++); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv, + int nlocal,int dim,X_FLOAT lo,X_FLOAT hi, + ExecutionSpace space) { + const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + LMPHostType::fence(); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify<LMPHostType>(); + k_count.sync<LMPDeviceType>(); + AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType> + f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/elements,f); + LMPDeviceType::fence(); + k_count.modify<LMPDeviceType>(); + k_count.sync<LMPHostType>(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | + ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); + + int k; + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i; + + h_num_bond(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_bond(nlocal); k++) { + h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_num_angle(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_angle(nlocal); k++) { + h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_num_dihedral(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_dihedral(nlocal); k++) { + h_dihedral_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_dihedral_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_num_improper(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_improper(nlocal); k++) { + h_improper_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_improper_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + h_nspecial(nlocal,0) = (int) ubuf(buf[m++]).i; + h_nspecial(nlocal,1) = (int) ubuf(buf[m++]).i; + h_nspecial(nlocal,2) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_nspecial(nlocal,2); k++) + h_special(nlocal,k) = (tagint) ubuf(buf[m++]).i; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 0; + for (i = 0; i < nlocal; i++) + n += 16 + 2*num_bond[i] + 4*num_angle[i] + + 5*num_dihedral[i] + 5*num_improper[i]; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_restart(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + + buf[m++] = ubuf(h_molecule(i)).d; + + buf[m++] = ubuf(h_num_bond(i)).d; + for (int k = 0; k < h_num_bond(i); k++) { + buf[m++] = ubuf(MAX(h_bond_type(i,k),-h_bond_type(i,k))).d; + buf[m++] = ubuf(h_bond_atom(i,k)).d; + } + + buf[m++] = ubuf(h_num_angle(i)).d; + for (int k = 0; k < h_num_angle(i); k++) { + buf[m++] = ubuf(MAX(h_angle_type(i,k),-h_angle_type(i,k))).d; + buf[m++] = ubuf(h_angle_atom1(i,k)).d; + buf[m++] = ubuf(h_angle_atom2(i,k)).d; + buf[m++] = ubuf(h_angle_atom3(i,k)).d; + } + + buf[m++] = ubuf(h_num_dihedral(i)).d; + for (int k = 0; k < h_num_dihedral(i); k++) { + buf[m++] = ubuf(MAX(h_dihedral_type(i,k),-h_dihedral_type(i,k))).d; + buf[m++] = ubuf(h_dihedral_atom1(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom2(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom3(i,k)).d; + buf[m++] = ubuf(h_dihedral_atom4(i,k)).d; + } + + buf[m++] = ubuf(h_num_improper(i)).d; + for (int k = 0; k < h_num_improper(i); k++) { + buf[m++] = ubuf(MAX(h_improper_type(i,k),-h_improper_type(i,k))).d; + buf[m++] = ubuf(h_improper_atom1(i,k)).d; + buf[m++] = ubuf(h_improper_atom2(i,k)).d; + buf[m++] = ubuf(h_improper_atom3(i,k)).d; + buf[m++] = ubuf(h_improper_atom4(i,k)).d; + } + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::unpack_restart(double *buf) +{ + int k; + + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + + h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i; + + h_num_bond(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_bond(nlocal); k++) { + h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_num_angle(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_angle(nlocal); k++) { + h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_num_dihedral(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_dihedral(nlocal); k++) { + h_dihedral_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_dihedral_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_dihedral_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_num_improper(nlocal) = (int) ubuf(buf[m++]).i; + for (k = 0; k < h_num_improper(nlocal); k++) { + h_improper_type(nlocal,k) = (int) ubuf(buf[m++]).i; + h_improper_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i; + h_improper_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i; + } + + h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast<int> (ubuf(buf[m++]).i) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + atomKK->modified(Host,ALL_MASK); + grow(0); + } + atomKK->modified(Host,ALL_MASK); + + tag[nlocal] = 0; + type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask(nlocal) = 1; + h_image(nlocal) = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_molecule(nlocal) = 0; + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + h_num_dihedral(nlocal) = 0; + h_num_improper(nlocal) = 0; + h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::data_atom(double *coord, imageint imagetmp, + char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + h_tag(nlocal) = atoi(values[0]); + if (h_tag(nlocal) <= 0) + error->one(FLERR,"Invalid atom ID in Atoms section of data file"); + + h_molecule(nlocal) = atoi(values[1]); + if (h_molecule(nlocal) <= 0) + error->one(FLERR,"Invalid molecule ID in Atoms section of data file"); + + h_type(nlocal) = atoi(values[2]); + if (h_type(nlocal) <= 0 || h_type(nlocal) > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image(nlocal) = imagetmp; + + h_mask(nlocal) = 1; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + h_num_dihedral(nlocal) = 0; + h_num_improper(nlocal) = 0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::data_atom_hybrid(int nlocal, char **values) +{ + h_molecule(nlocal) = atoi(values[0]); + h_num_bond(nlocal) = 0; + h_num_angle(nlocal) = 0; + h_num_dihedral(nlocal) = 0; + h_num_improper(nlocal) = 0; + return 1; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::pack_data(double **buf) +{ + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = h_tag(i); + buf[i][1] = h_molecule(i); + buf[i][2] = h_type(i); + buf[i][3] = h_x(i,0); + buf[i][4] = h_x(i,1); + buf[i][5] = h_x(i,2); + buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; + buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; + buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::pack_data_hybrid(int i, double *buf) +{ + buf[0] = h_molecule(i); + return 1; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,"%d %d %d %-1.16e %-1.16e %-1.16e %d %d %d\n", + (int) buf[i][0],(int) buf[i][1], (int) buf[i][2], + buf[i][3],buf[i][4],buf[i][5], + (int) buf[i][6],(int) buf[i][7],(int) buf[i][8]); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecMolecularKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," " TAGINT_FORMAT, (tagint) (buf[0])); + return 1; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecMolecularKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); + + if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax); + if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3); + if (atom->memcheck("special")) + bytes += memory->usage(special,nmax,atom->maxspecial); + + if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax); + if (atom->memcheck("bond_type")) + bytes += memory->usage(bond_type,nmax,atom->bond_per_atom); + if (atom->memcheck("bond_atom")) + bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom); + + if (atom->memcheck("num_angle")) bytes += memory->usage(num_angle,nmax); + if (atom->memcheck("angle_type")) + bytes += memory->usage(angle_type,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom1")) + bytes += memory->usage(angle_atom1,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom2")) + bytes += memory->usage(angle_atom2,nmax,atom->angle_per_atom); + if (atom->memcheck("angle_atom3")) + bytes += memory->usage(angle_atom3,nmax,atom->angle_per_atom); + + if (atom->memcheck("num_dihedral")) bytes += memory->usage(num_dihedral,nmax); + if (atom->memcheck("dihedral_type")) + bytes += memory->usage(dihedral_type,nmax,atom->dihedral_per_atom); + if (atom->memcheck("dihedral_atom1")) + bytes += memory->usage(dihedral_atom1,nmax,atom->dihedral_per_atom); + if (atom->memcheck("dihedral_atom2")) + bytes += memory->usage(dihedral_atom2,nmax,atom->dihedral_per_atom); + if (atom->memcheck("dihedral_atom3")) + bytes += memory->usage(dihedral_atom3,nmax,atom->dihedral_per_atom); + if (atom->memcheck("dihedral_atom4")) + bytes += memory->usage(dihedral_atom4,nmax,atom->dihedral_per_atom); + if (atom->memcheck("num_improper")) bytes += memory->usage(num_improper,nmax); + if (atom->memcheck("improper_type")) + bytes += memory->usage(improper_type,nmax,atom->improper_per_atom); + if (atom->memcheck("improper_atom1")) + bytes += memory->usage(improper_atom1,nmax,atom->improper_per_atom); + if (atom->memcheck("improper_atom2")) + bytes += memory->usage(improper_atom2,nmax,atom->improper_per_atom); + if (atom->memcheck("improper_atom3")) + bytes += memory->usage(improper_atom3,nmax,atom->improper_per_atom); + if (atom->memcheck("improper_atom4")) + bytes += memory->usage(improper_atom4,nmax,atom->improper_per_atom); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPDeviceType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.sync<LMPDeviceType>(); + atomKK->k_special.sync<LMPDeviceType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.sync<LMPDeviceType>(); + atomKK->k_bond_type.sync<LMPDeviceType>(); + atomKK->k_bond_atom.sync<LMPDeviceType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.sync<LMPDeviceType>(); + atomKK->k_angle_type.sync<LMPDeviceType>(); + atomKK->k_angle_atom1.sync<LMPDeviceType>(); + atomKK->k_angle_atom2.sync<LMPDeviceType>(); + atomKK->k_angle_atom3.sync<LMPDeviceType>(); + } + if (mask && DIHEDRAL_MASK) { + atomKK->k_num_dihedral.sync<LMPDeviceType>(); + atomKK->k_dihedral_type.sync<LMPDeviceType>(); + atomKK->k_dihedral_atom1.sync<LMPDeviceType>(); + atomKK->k_dihedral_atom2.sync<LMPDeviceType>(); + atomKK->k_dihedral_atom3.sync<LMPDeviceType>(); + atomKK->k_dihedral_atom4.sync<LMPDeviceType>(); + } + if (mask && IMPROPER_MASK) { + atomKK->k_num_improper.sync<LMPDeviceType>(); + atomKK->k_improper_type.sync<LMPDeviceType>(); + atomKK->k_improper_atom1.sync<LMPDeviceType>(); + atomKK->k_improper_atom2.sync<LMPDeviceType>(); + atomKK->k_improper_atom3.sync<LMPDeviceType>(); + atomKK->k_improper_atom3.sync<LMPDeviceType>(); + } + } else { + if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPHostType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.sync<LMPHostType>(); + atomKK->k_special.sync<LMPHostType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.sync<LMPHostType>(); + atomKK->k_bond_type.sync<LMPHostType>(); + atomKK->k_bond_atom.sync<LMPHostType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.sync<LMPHostType>(); + atomKK->k_angle_type.sync<LMPHostType>(); + atomKK->k_angle_atom1.sync<LMPHostType>(); + atomKK->k_angle_atom2.sync<LMPHostType>(); + atomKK->k_angle_atom3.sync<LMPHostType>(); + } + if (mask && DIHEDRAL_MASK) { + atomKK->k_num_dihedral.sync<LMPHostType>(); + atomKK->k_dihedral_type.sync<LMPHostType>(); + atomKK->k_dihedral_atom1.sync<LMPHostType>(); + atomKK->k_dihedral_atom2.sync<LMPHostType>(); + atomKK->k_dihedral_atom3.sync<LMPHostType>(); + atomKK->k_dihedral_atom4.sync<LMPHostType>(); + } + if (mask && IMPROPER_MASK) { + atomKK->k_num_improper.sync<LMPHostType>(); + atomKK->k_improper_type.sync<LMPHostType>(); + atomKK->k_improper_atom1.sync<LMPHostType>(); + atomKK->k_improper_atom2.sync<LMPHostType>(); + atomKK->k_improper_atom3.sync<LMPHostType>(); + atomKK->k_improper_atom3.sync<LMPHostType>(); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPDeviceType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.modify<LMPDeviceType>(); + atomKK->k_special.modify<LMPDeviceType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.modify<LMPDeviceType>(); + atomKK->k_bond_type.modify<LMPDeviceType>(); + atomKK->k_bond_atom.modify<LMPDeviceType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.modify<LMPDeviceType>(); + atomKK->k_angle_type.modify<LMPDeviceType>(); + atomKK->k_angle_atom1.modify<LMPDeviceType>(); + atomKK->k_angle_atom2.modify<LMPDeviceType>(); + atomKK->k_angle_atom3.modify<LMPDeviceType>(); + } + if (mask && DIHEDRAL_MASK) { + atomKK->k_num_dihedral.modify<LMPDeviceType>(); + atomKK->k_dihedral_type.modify<LMPDeviceType>(); + atomKK->k_dihedral_atom1.modify<LMPDeviceType>(); + atomKK->k_dihedral_atom2.modify<LMPDeviceType>(); + atomKK->k_dihedral_atom3.modify<LMPDeviceType>(); + atomKK->k_dihedral_atom4.modify<LMPDeviceType>(); + } + if (mask && IMPROPER_MASK) { + atomKK->k_num_improper.modify<LMPDeviceType>(); + atomKK->k_improper_type.modify<LMPDeviceType>(); + atomKK->k_improper_atom1.modify<LMPDeviceType>(); + atomKK->k_improper_atom2.modify<LMPDeviceType>(); + atomKK->k_improper_atom3.modify<LMPDeviceType>(); + atomKK->k_improper_atom3.modify<LMPDeviceType>(); + } + } else { + if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); + if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPHostType>(); + if (mask && SPECIAL_MASK) { + atomKK->k_nspecial.modify<LMPHostType>(); + atomKK->k_special.modify<LMPHostType>(); + } + if (mask && BOND_MASK) { + atomKK->k_num_bond.modify<LMPHostType>(); + atomKK->k_bond_type.modify<LMPHostType>(); + atomKK->k_bond_atom.modify<LMPHostType>(); + } + if (mask && ANGLE_MASK) { + atomKK->k_num_angle.modify<LMPHostType>(); + atomKK->k_angle_type.modify<LMPHostType>(); + atomKK->k_angle_atom1.modify<LMPHostType>(); + atomKK->k_angle_atom2.modify<LMPHostType>(); + atomKK->k_angle_atom3.modify<LMPHostType>(); + } + if (mask && DIHEDRAL_MASK) { + atomKK->k_num_dihedral.modify<LMPHostType>(); + atomKK->k_dihedral_type.modify<LMPHostType>(); + atomKK->k_dihedral_atom1.modify<LMPHostType>(); + atomKK->k_dihedral_atom2.modify<LMPHostType>(); + atomKK->k_dihedral_atom3.modify<LMPHostType>(); + atomKK->k_dihedral_atom4.modify<LMPHostType>(); + } + if (mask && IMPROPER_MASK) { + atomKK->k_num_improper.modify<LMPHostType>(); + atomKK->k_improper_type.modify<LMPHostType>(); + atomKK->k_improper_atom1.modify<LMPHostType>(); + atomKK->k_improper_atom2.modify<LMPHostType>(); + atomKK->k_improper_atom3.modify<LMPHostType>(); + atomKK->k_improper_atom3.modify<LMPHostType>(); + } + } +} diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h new file mode 100644 index 0000000000..24093beefd --- /dev/null +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -0,0 +1,178 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(molecular/kk,AtomVecMolecularKokkos) + +#else + +#ifndef LMP_ATOM_VEC_MOLECULAR_KOKKOS_H +#define LMP_ATOM_VEC_MOLECULAR_KOKKOS_H + +#include "atom_vec_kokkos.h" + +namespace LAMMPS_NS { + +class AtomVecMolecularKokkos : public AtomVecKokkos { + public: + AtomVecMolecularKokkos(class LAMMPS *); + virtual ~AtomVecMolecularKokkos() {} + void grow(int); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int pack_reverse(int, int, double *); + void unpack_reverse(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, tagint, char **); + int data_atom_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + bigint memory_usage(); + + void grow_reset(); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + + protected: + + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; + + tagint *molecule; + int **nspecial; + tagint **special; + int *num_bond; + int **bond_type; + tagint **bond_atom; + + int *num_angle; + int **angle_type; + tagint **angle_atom1,**angle_atom2,**angle_atom3; + + int *num_dihedral; + int **dihedral_type; + tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4; + int *num_improper; + int **improper_type; + tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4; + + DAT::t_tagint_1d d_tag; + DAT::t_int_1d d_type, d_mask; + HAT::t_tagint_1d h_tag; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + + DAT::t_tagint_1d d_molecule; + DAT::t_int_2d d_nspecial; + DAT::t_tagint_2d d_special; + DAT::t_int_1d d_num_bond; + DAT::t_int_2d d_bond_type; + DAT::t_tagint_2d d_bond_atom; + + HAT::t_tagint_1d h_molecule; + HAT::t_int_2d h_nspecial; + HAT::t_tagint_2d h_special; + HAT::t_int_1d h_num_bond; + HAT::t_int_2d h_bond_type; + HAT::t_tagint_2d h_bond_atom; + + DAT::t_int_1d d_num_angle; + DAT::t_int_2d d_angle_type; + DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; + + HAT::t_int_1d h_num_angle; + HAT::t_int_2d h_angle_type; + HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; + + DAT::t_int_1d d_num_dihedral; + DAT::t_int_2d d_dihedral_type; + DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, + d_dihedral_atom3,d_dihedral_atom4; + DAT::t_int_1d d_num_improper; + DAT::t_int_2d d_improper_type; + DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, + d_improper_atom3,d_improper_atom4; + + HAT::t_int_1d h_num_dihedral; + HAT::t_int_2d h_dihedral_type; + HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, + h_dihedral_atom3,h_dihedral_atom4; + HAT::t_int_1d h_num_improper; + HAT::t_int_2d h_improper_type; + HAT::t_tagint_2d h_improper_atom1,h_improper_atom2, + h_improper_atom3,h_improper_atom4; + + HAT::tdual_int_1d k_count; + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 5211d11a02..1319503c1e 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -21,6 +21,13 @@ #include "atom_masks.h" #include "error.h" #include "memory.h" +#include "force.h" +#include "pair.h" +#include "fix.h" +#include "compute.h" +#include "dump.h" +#include "output.h" +#include "modify.h" using namespace LAMMPS_NS; @@ -43,15 +50,19 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) // initialize comm buffers & exchange memory - maxsend = BUFMIN; - k_buf_send = ArrayTypes<LMPDeviceType>:: - tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6); - buf_send = k_buf_send.view<LMPHostType>().ptr_on_device(); + // maxsend = BUFMIN; + // k_buf_send = ArrayTypes<LMPDeviceType>:: + // tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6); + // buf_send = k_buf_send.view<LMPHostType>().ptr_on_device(); + maxsend = 0; + buf_send = NULL; - maxrecv = BUFMIN; - k_buf_recv = ArrayTypes<LMPDeviceType>:: - tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6); - buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device(); + // maxrecv = BUFMIN; + // k_buf_recv = ArrayTypes<LMPDeviceType>:: + // tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6); + // buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device(); + maxrecv = 0; + buf_recv = NULL; k_exchange_sendlist = ArrayTypes<LMPDeviceType>:: tdual_int_1d("comm:k_exchange_sendlist",100); @@ -89,6 +100,34 @@ void CommKokkos::init() forward_comm_on_host = lmp->kokkos->forward_comm_on_host; CommBrick::init(); + + int check_forward = 0; + int check_reverse = 0; + if (force->pair) + check_forward += force->pair->comm_forward; + if (force->pair) + check_reverse += force->pair->comm_reverse; + + for (int i = 0; i < modify->nfix; i++) { + check_forward += modify->fix[i]->comm_forward; + check_reverse += modify->fix[i]->comm_reverse; + } + + for (int i = 0; i < modify->ncompute; i++) { + check_forward += modify->compute[i]->comm_forward; + check_reverse += modify->compute[i]->comm_reverse; + } + + for (int i = 0; i < output->ndump; i++) { + check_forward += output->dump[i]->comm_forward; + check_reverse += output->dump[i]->comm_reverse; + } + + if (force->newton == 0) check_reverse = 0; + if (force->pair) check_reverse += force->pair->comm_reverse_off; + + if(check_reverse || check_forward) + forward_comm_classic = true; } /* ---------------------------------------------------------------------- @@ -98,8 +137,7 @@ void CommKokkos::init() void CommKokkos::forward_comm(int dummy) { - - if (!forward_comm_classic) { + if (!forward_comm_classic) { if (forward_comm_on_host) forward_comm_device<LMPHostType>(dummy); else forward_comm_device<LMPDeviceType>(dummy); return; @@ -205,6 +243,68 @@ void CommKokkos::forward_comm_device(int dummy) } } } +void CommKokkos::reverse_comm() +{ + k_sendlist.sync<LMPHostType>(); + if (comm_f_only) + atomKK->sync(Host,F_MASK); + else + atomKK->sync(Host,ALL_MASK); + CommBrick::reverse_comm(); + if (comm_f_only) + atomKK->modified(Host,F_MASK); + else + atomKK->modified(Host,ALL_MASK); + atomKK->sync(Device,ALL_MASK); +} + +void CommKokkos::forward_comm_fix(Fix *fix) +{ + k_sendlist.sync<LMPHostType>(); + CommBrick::forward_comm_fix(fix); +} + +void CommKokkos::reverse_comm_fix(Fix *fix) +{ + k_sendlist.sync<LMPHostType>(); + CommBrick::reverse_comm_fix(fix); +} + +void CommKokkos::forward_comm_compute(Compute *compute) +{ + k_sendlist.sync<LMPHostType>(); + CommBrick::forward_comm_compute(compute); +} + +void CommKokkos::reverse_comm_compute(Compute *compute) +{ + k_sendlist.sync<LMPHostType>(); + CommBrick::reverse_comm_compute(compute); +} + +void CommKokkos::forward_comm_pair(Pair *pair) +{ + k_sendlist.sync<LMPHostType>(); + CommBrick::forward_comm_pair(pair); +} + +void CommKokkos::reverse_comm_pair(Pair *pair) +{ + k_sendlist.sync<LMPHostType>(); + CommBrick::reverse_comm_pair(pair); +} + +void CommKokkos::forward_comm_dump(Dump *dump) +{ + k_sendlist.sync<LMPHostType>(); + CommBrick::forward_comm_dump(dump); +} + +void CommKokkos::reverse_comm_dump(Dump *dump) +{ + k_sendlist.sync<LMPHostType>(); + CommBrick::reverse_comm_dump(dump); +} /* ---------------------------------------------------------------------- exchange: move atoms to correct processors @@ -219,6 +319,16 @@ void CommKokkos::forward_comm_device(int dummy) void CommKokkos::exchange() { + if(atom->nextra_grow + atom->nextra_border) { + if(!exchange_comm_classic) { + static int print = 1; + if(print) { + error->warning(FLERR,"Kokkos communication does not currently support fixes sending data. Switching to classic communication."); + print = 0; + } + exchange_comm_classic = true; + } + } if (!exchange_comm_classic) { if (exchange_comm_on_host) exchange_device<LMPHostType>(); else exchange_device<LMPDeviceType>(); @@ -463,10 +573,12 @@ void CommKokkos::borders() } atomKK->sync(Host,ALL_MASK); + k_sendlist.modify<LMPHostType>(); atomKK->modified(Host,ALL_MASK); - CommBrick::borders(); + k_sendlist.modify<LMPHostType>(); + atomKK->modified(Host,ALL_MASK); } /* ---------------------------------------------------------------------- */ @@ -496,7 +608,7 @@ struct BuildBorderListFunctor { KOKKOS_INLINE_FUNCTION - void operator() (DeviceType dev) const { + void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const { const int chunk = ((nlast - nfirst + dev.league_size() - 1 ) / dev.league_size()); const int teamstart = chunk*dev.league_rank() + nfirst; @@ -517,7 +629,7 @@ struct BuildBorderListFunctor { } } - size_t shmem_size() const { return 1000u;} + size_t shmem_size(const int team_size) const { (void) team_size; return 1000u;} }; /* ---------------------------------------------------------------------- */ @@ -591,16 +703,19 @@ void CommKokkos::borders_device() { total_send.template modify<DeviceType>(); total_send.template sync<LMPDeviceType>(); } + BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist, total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); - Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128); + Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); DeviceType::fence(); + total_send.template modify<DeviceType>(); total_send.template sync<LMPHostType>(); if(total_send.h_view(0) >= maxsendlist[iswap]) { grow_list(iswap,total_send.h_view(0)); + k_sendlist.modify<DeviceType>(); total_send.h_view(0) = 0; if(exec_space == Device) { total_send.template modify<LMPHostType>(); @@ -608,7 +723,7 @@ void CommKokkos::borders_device() { } BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist, total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); - Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128); + Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); DeviceType::fence(); total_send.template modify<DeviceType>(); @@ -742,6 +857,25 @@ void CommKokkos::borders_device() { atomKK->modified(exec_space,ALL_MASK); DeviceType::fence(); } +/* ---------------------------------------------------------------------- + realloc the size of the send buffer as needed with BUFFACTOR and bufextra + if flag = 1, realloc + if flag = 0, don't need to realloc with copy, just free/malloc +------------------------------------------------------------------------- */ + +void CommKokkos::grow_send(int n, int flag) +{ + grow_send_kokkos(n,flag,Host); +} + +/* ---------------------------------------------------------------------- + free/malloc the size of the recv buffer as needed with BUFFACTOR +------------------------------------------------------------------------- */ + +void CommKokkos::grow_recv(int n) +{ + grow_recv_kokkos(n,Host); +} /* ---------------------------------------------------------------------- realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index 46d3552d2d..539156af09 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -33,9 +33,19 @@ class CommKokkos : public CommBrick { void init(); void forward_comm(int dummy = 0); // forward comm of atom coords + void reverse_comm(); // reverse comm of atom coords void exchange(); // move atoms to new procs void borders(); // setup list of atoms to comm + void forward_comm_pair(class Pair *); // forward comm from a Pair + void reverse_comm_pair(class Pair *); // reverse comm from a Pair + void forward_comm_fix(class Fix *); // forward comm from a Fix + void reverse_comm_fix(class Fix *); // reverse comm from a Fix + void forward_comm_compute(class Compute *); // forward from a Compute + void reverse_comm_compute(class Compute *); // reverse from a Compute + void forward_comm_dump(class Dump *); // forward comm from a Dump + void reverse_comm_dump(class Dump *); // reverse comm from a Dump + template<class DeviceType> void forward_comm_device(int dummy); template<class DeviceType> void exchange_device(); template<class DeviceType> void borders_device(); @@ -48,6 +58,8 @@ class CommKokkos : public CommBrick { //double *buf_send; // send buffer for all comm //double *buf_recv; // recv buffer for all comm + void grow_send(int, int); + void grow_recv(int); void grow_send_kokkos(int, int, ExecutionSpace space = Host); void grow_recv_kokkos(int, ExecutionSpace space = Host); void grow_list(int, int); diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp new file mode 100644 index 0000000000..cd34d3a964 --- /dev/null +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -0,0 +1,810 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + ------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "string.h" +#include "fix_langevin_kokkos.h" +#include "atom_masks.h" +#include "atom_kokkos.h" +#include "force.h" +#include "update.h" +#include "respa.h" +#include "error.h" +#include "memory.h" +#include "group.h" +#include "random_mars.h" +#include "compute.h" +#include "comm.h" +#include "modify.h" +#include "input.h" +#include "variable.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{NOBIAS,BIAS}; +enum{CONSTANT,EQUAL,ATOM}; +#define SINERTIA 0.4 // moment of inertia prefactor for sphere +#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **arg) : + FixLangevin(lmp, narg, arg),rand_pool(seed + comm->me) +{ + atomKK = (AtomKokkos *) atom; + int ntypes = atomKK->ntypes; + + // allocate per-type arrays for force prefactors + memory->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1"); + memory->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2"); + memory->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio"); + d_gfactor1 = k_gfactor1.template view<DeviceType>(); + h_gfactor1 = k_gfactor1.template view<LMPHostType>(); + d_gfactor2 = k_gfactor2.template view<DeviceType>(); + h_gfactor2 = k_gfactor2.template view<LMPHostType>(); + d_ratio = k_ratio.template view<DeviceType>(); + h_ratio = k_ratio.template view<LMPHostType>(); + + // optional args + for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0; + k_ratio.template modify<LMPHostType>(); + + if(gjfflag){ + nvalues = 3; + grow_arrays(atomKK->nmax); + atom->add_callback(0); + // initialize franprev to zero + for (int i = 0; i < atomKK->nlocal; i++) { + franprev[i][0] = 0.0; + franprev[i][1] = 0.0; + franprev[i][2] = 0.0; + } + k_franprev.template modify<LMPHostType>(); + } + if(zeroflag){ + k_fsumall = tdual_double_1d_3n("langevin:fsumall"); + h_fsumall = k_fsumall.template view<LMPHostType>(); + d_fsumall = k_fsumall.template view<DeviceType>(); + } + + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK; + datamask_modify = F_MASK; + +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +FixLangevinKokkos<DeviceType>::~FixLangevinKokkos() +{ + memory->destroy_kokkos(k_gfactor1,gfactor1); + memory->destroy_kokkos(k_gfactor2,gfactor2); + memory->destroy_kokkos(k_ratio,ratio); + memory->destroy_kokkos(k_flangevin,flangevin); + if(gjfflag) memory->destroy_kokkos(k_franprev,franprev); + memory->destroy_kokkos(k_tforce,tforce); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixLangevinKokkos<DeviceType>::init() +{ + FixLangevin::init(); + if(oflag) + error->all(FLERR,"Fix langevin omega is not yet implemented with kokkos"); + if(ascale) + error->all(FLERR,"Fix langevin angmom is not yet implemented with kokkos"); + + // prefactors are modified in the init + k_gfactor1.template modify<LMPHostType>(); + k_gfactor2.template modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixLangevinKokkos<DeviceType>::grow_arrays(int nmax) +{ + memory->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev"); + d_franprev = k_franprev.template view<DeviceType>(); + h_franprev = k_franprev.template view<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixLangevinKokkos<DeviceType>::post_force(int vflag) +{ + // sync the device views which might have been modified on host + atomKK->sync(execution_space,datamask_read); + rmass = atomKK->rmass; + f = atomKK->k_f.template view<DeviceType>(); + v = atomKK->k_v.template view<DeviceType>(); + type = atomKK->k_type.template view<DeviceType>(); + mask = atomKK->k_mask.template view<DeviceType>(); + + k_gfactor1.template sync<DeviceType>(); + k_gfactor2.template sync<DeviceType>(); + k_ratio.template sync<DeviceType>(); + if(gjfflag) k_franprev.template sync<DeviceType>(); + + boltz = force->boltz; + dt = update->dt; + mvv2e = force->mvv2e; + ftm2v = force->ftm2v; + fran_prop_const = sqrt(24.0*boltz/t_period/dt/mvv2e); + + compute_target(); // modifies tforce vector, hence sync here + k_tforce.template sync<DeviceType>(); + + double fsum[3],fsumall[3]; + bigint count; + int nlocal = atomKK->nlocal; + + if (zeroflag) { + fsum[0] = fsum[1] = fsum[2] = 0.0; + count = group->count(igroup); + if (count == 0) + error->all(FLERR,"Cannot zero Langevin force of 0 atoms"); + } + + // reallocate flangevin if necessary + if (tallyflag) { + if (nlocal > maxatom1) { + memory->destroy_kokkos(k_flangevin,flangevin); + maxatom1 = atomKK->nmax; + memory->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin"); + d_flangevin = k_flangevin.template view<DeviceType>(); + h_flangevin = k_flangevin.template view<LMPHostType>(); + } + } + + // account for bias velocity + if(tbiasflag == BIAS){ + temperature->compute_scalar(); + temperature->remove_bias_all(); // modifies velocities + // if temeprature compute is kokkosized host-devcie comm won't be needed + atomKK->modified(Host,V_MASK); + atomKK->sync(execution_space,V_MASK); + } + + // compute langevin force in parallel on the device + FSUM s_fsum; + if (tstyle == ATOM) + if (gjfflag) + if (tallyflag) + if (tbiasflag == BIAS) + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else{ + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else{ + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (tbiasflag == BIAS) + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (tallyflag) + if (tbiasflag == BIAS) + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (tbiasflag == BIAS) + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (gjfflag) + if (tallyflag) + if (tbiasflag == BIAS) + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (tbiasflag == BIAS) + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (tallyflag) + if (tbiasflag == BIAS) + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (tbiasflag == BIAS) + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (rmass) + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + else + if (zeroflag) { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,0,1> post_functor(this); + Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); + } + else { + FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,0,0> post_functor(this); + Kokkos::parallel_for(nlocal,post_functor); + } + + DeviceType::fence(); + + if(tbiasflag == BIAS){ + temperature->restore_bias_all(); // modifies velocities + atomKK->modified(Host,V_MASK); + } + + // set modify flags for the views modified in post_force functor + if (gjfflag) k_franprev.template modify<DeviceType>(); + if (tallyflag) k_flangevin.template modify<DeviceType>(); + + // set total force to zero + if (zeroflag) { + fsum[0] = s_fsum.fx; fsum[1] = s_fsum.fy; fsum[2] = s_fsum.fz; + MPI_Allreduce(fsum,fsumall,3,MPI_DOUBLE,MPI_SUM,world); + h_fsumall(0) = fsumall[0]/count; + h_fsumall(1) = fsumall[1]/count; + h_fsumall(2) = fsumall[2]/count; + k_fsumall.template modify<LMPHostType>(); + k_fsumall.template sync<DeviceType>(); + // set total force zero in parallel on the device + FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this); + Kokkos::parallel_for(nlocal,zero_functor); + DeviceType::fence(); + } + // f is modified by both post_force and zero_force functors + atomKK->modified(execution_space,datamask_modify); + + // thermostat omega and angmom + // if (oflag) omega_thermostat(); + // if (ascale) angmom_thermostat(); + +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +template<int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, + int Tp_BIAS, int Tp_RMASS, int Tp_ZERO> +KOKKOS_INLINE_FUNCTION +FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const +{ + FSUM fsum; + double fdrag[3],fran[3]; + double gamma1,gamma2; + double fswap; + double tsqrt_t = tsqrt; + + if (mask[i] & groupbit) { + rand_type rand_gen = rand_pool.get_state(); + if(Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]); + if(Tp_RMASS){ + gamma1 = -rmass[i] / t_period / ftm2v; + gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v; + gamma1 *= 1.0/d_ratio[type[i]]; + gamma2 *= 1.0/sqrt(d_ratio[type[i]]) * tsqrt_t; + } else { + gamma1 = d_gfactor1[type[i]]; + gamma2 = d_gfactor2[type[i]] * tsqrt_t; + } + + fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); + fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); + fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5); + + if(Tp_BIAS){ + fdrag[0] = gamma1*v(i,0); + fdrag[1] = gamma1*v(i,1); + fdrag[2] = gamma1*v(i,2); + if (v(i,0) == 0.0) fran[0] = 0.0; + if (v(i,1) == 0.0) fran[1] = 0.0; + if (v(i,2) == 0.0) fran[2] = 0.0; + }else{ + fdrag[0] = gamma1*v(i,0); + fdrag[1] = gamma1*v(i,1); + fdrag[2] = gamma1*v(i,2); + } + + if (Tp_GJF) { + fswap = 0.5*(fran[0]+d_franprev(i,0)); + d_franprev(i,0) = fran[0]; + fran[0] = fswap; + fswap = 0.5*(fran[1]+d_franprev(i,1)); + d_franprev(i,1) = fran[1]; + fran[1] = fswap; + fswap = 0.5*(fran[2]+d_franprev(i,2)); + d_franprev(i,2) = fran[2]; + fran[2] = fswap; + + fdrag[0] *= gjffac; + fdrag[1] *= gjffac; + fdrag[2] *= gjffac; + fran[0] *= gjffac; + fran[1] *= gjffac; + fran[2] *= gjffac; + f(i,0) *= gjffac; + f(i,1) *= gjffac; + f(i,2) *= gjffac; + } + + f(i,0) += fdrag[0] + fran[0]; + f(i,1) += fdrag[1] + fran[1]; + f(i,2) += fdrag[2] + fran[2]; + + if (Tp_TALLY) { + d_flangevin(i,0) = fdrag[0] + fran[0]; + d_flangevin(i,1) = fdrag[1] + fran[1]; + d_flangevin(i,2) = fdrag[2] + fran[2]; + } + + if (Tp_ZERO) { + fsum.fx = fran[0]; + fsum.fy = fran[1]; + fsum.fz = fran[2]; + } + rand_pool.free_state(rand_gen); + } + + return fsum; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void FixLangevinKokkos<DeviceType>::zero_force_item(int i) const +{ + if (mask[i] & groupbit) { + f(i,0) -= d_fsumall[0]; + f(i,1) -= d_fsumall[1]; + f(i,2) -= d_fsumall[2]; + } + +} + +/* ---------------------------------------------------------------------- + set current t_target and t_sqrt + ------------------------------------------------------------------------- */ + +template<class DeviceType> +void FixLangevinKokkos<DeviceType>::compute_target() +{ + atomKK->sync(Host, MASK_MASK); + mask = atomKK->k_mask.template view<DeviceType>(); + int nlocal = atomKK->nlocal; + + double delta = update->ntimestep - update->beginstep; + if (delta != 0.0) delta /= update->endstep - update->beginstep; + + // if variable temp, evaluate variable, wrap with clear/add + // reallocate tforce array if necessary + + if (tstyle == CONSTANT) { + t_target = t_start + delta * (t_stop-t_start); + tsqrt = sqrt(t_target); + } else { + modify->clearstep_compute(); + if (tstyle == EQUAL) { + t_target = input->variable->compute_equal(tvar); + if (t_target < 0.0) + error->one(FLERR,"Fix langevin variable returned negative temperature"); + tsqrt = sqrt(t_target); + } else { + if (nlocal > maxatom2) { + maxatom2 = atom->nmax; + memory->destroy_kokkos(k_tforce,tforce); + memory->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce"); + d_tforce = k_tforce.template view<DeviceType>(); + h_tforce = k_tforce.template view<LMPHostType>(); + } + input->variable->compute_atom(tvar,igroup,tforce,1,0); // tforce is modified on host + k_tforce.template modify<LMPHostType>(); + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) + if (h_tforce[i] < 0.0) + error->one(FLERR, + "Fix langevin variable returned negative temperature"); + } + modify->addstep_compute(update->ntimestep + 1); + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixLangevinKokkos<DeviceType>::reset_dt() +{ + if (atomKK->mass) { + for (int i = 1; i <= atomKK->ntypes; i++) { + h_gfactor2[i] = sqrt(atomKK->mass[i]) * + sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) / + force->ftm2v; + h_gfactor2[i] *= 1.0/sqrt(h_ratio[i]); + } + k_gfactor2.template modify<LMPHostType>(); + } + +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +double FixLangevinKokkos<DeviceType>::compute_scalar() +{ + if (!tallyflag || flangevin == NULL) return 0.0; + + v = atomKK->k_v.template view<DeviceType>(); + mask = atomKK->k_mask.template view<DeviceType>(); + + // capture the very first energy transfer to thermal reservoir + + if (update->ntimestep == update->beginstep) { + energy_onestep = 0.0; + atomKK->sync(execution_space,V_MASK | MASK_MASK); + int nlocal = atomKK->nlocal; + k_flangevin.template sync<DeviceType>(); + FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this); + Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep); + DeviceType::fence(); + energy = 0.5*energy_onestep*update->dt; + } + + // convert midstep energy back to previous fullstep energy + double energy_me = energy - 0.5*energy_onestep*update->dt; + double energy_all; + MPI_Allreduce(&energy_me,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + return -energy_all; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +double FixLangevinKokkos<DeviceType>::compute_energy_item(int i) const +{ + double energy; + if (mask[i] & groupbit) + energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) + + d_flangevin(i,2)*v(i,2); + return energy; +} + +/* ---------------------------------------------------------------------- + tally energy transfer to thermal reservoir + ------------------------------------------------------------------------- */ + +template<class DeviceType> +void FixLangevinKokkos<DeviceType>::end_of_step() +{ + if (!tallyflag) return; + + v = atomKK->k_v.template view<DeviceType>(); + mask = atomKK->k_mask.template view<DeviceType>(); + + atomKK->sync(execution_space,V_MASK | MASK_MASK); + int nlocal = atomKK->nlocal; + + energy_onestep = 0.0; + + k_flangevin.template sync<DeviceType>(); + FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this); + Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep); + DeviceType::fence(); + + energy += energy_onestep*update->dt; +} + +/* ---------------------------------------------------------------------- + copy values within local atom-based array + ------------------------------------------------------------------------- */ + +template<class DeviceType> +void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int delflag) +{ + for (int m = 0; m < nvalues; m++) + h_franprev(j,m) = h_franprev(i,m); + + k_franprev.template modify<LMPHostType>(); + +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixLangevinKokkos<DeviceType>::cleanup_copy() +{ + random = NULL; + tstr = NULL; + gfactor1 = NULL; + gfactor2 = NULL; + ratio = NULL; + id_temp = NULL; + flangevin = NULL; + tforce = NULL; + gjfflag = 0; + franprev = NULL; + id = style = NULL; + vatom = NULL; +} + +template class FixLangevinKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class FixLangevinKokkos<LMPHostType>; +#endif diff --git a/src/KOKKOS/fix_langevin_kokkos.h b/src/KOKKOS/fix_langevin_kokkos.h new file mode 100644 index 0000000000..48a7d5ec57 --- /dev/null +++ b/src/KOKKOS/fix_langevin_kokkos.h @@ -0,0 +1,266 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + ------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(langevin/kk,FixLangevinKokkos<LMPDeviceType>) +FixStyle(langevin/kk/device,FixLangevinKokkos<LMPDeviceType>) +FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>) + +#else + +#ifndef LMP_FIX_LANGEVIN_KOKKOS_H +#define LMP_FIX_LANGEVIN_KOKKOS_H + +#include "fix_langevin.h" +#include "kokkos_type.h" +#include "Kokkos_Random.hpp" +#include "comm_kokkos.h" + +namespace LAMMPS_NS { + + struct s_FSUM { + double fx, fy, fz; + KOKKOS_INLINE_FUNCTION + s_FSUM() { + fx = fy = fz = 0.0; + } + KOKKOS_INLINE_FUNCTION + s_FSUM& operator+=(const s_FSUM &rhs){ + fx += rhs.fx; + fy += rhs.fy; + fz += rhs.fz; + return *this; + } + }; + typedef s_FSUM FSUM; + + template<class DeviceType> + class FixLangevinKokkos; + + template<class DeviceType,int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, + int Tp_BIAS, int Tp_RMASS, int Tp_ZERO> + class FixLangevinKokkosPostForceFunctor; + + template<class DeviceType> class FixLangevinKokkosZeroForceFunctor; + + template<class DeviceType> class FixLangevinKokkosTallyEnergyFunctor; + + template<class DeviceType> + class FixLangevinKokkos : public FixLangevin { + public: + FixLangevinKokkos(class LAMMPS *, int, char **); + ~FixLangevinKokkos(); + + void cleanup_copy(); + void init(); + void post_force(int); + void reset_dt(); + void grow_arrays(int); + void copy_arrays(int i, int j, int delflag); + double compute_scalar(); + void end_of_step(); + + template<int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, + int Tp_BIAS, int Tp_RMASS, int Tp_ZERO> + KOKKOS_INLINE_FUNCTION + FSUM post_force_item(int) const; + + KOKKOS_INLINE_FUNCTION + void zero_force_item(int) const; + + KOKKOS_INLINE_FUNCTION + double compute_energy_item(int) const; + + private: + class CommKokkos *commKK; + class AtomKokkos *atomKK; + double *rmass; + typename ArrayTypes<DeviceType>::tdual_double_2d k_franprev; + typename ArrayTypes<DeviceType>::t_double_2d d_franprev; + HAT::t_double_2d h_franprev; + + typename ArrayTypes<DeviceType>::tdual_double_2d k_flangevin; + typename ArrayTypes<DeviceType>::t_double_2d d_flangevin; + HAT::t_double_2d h_flangevin; + + typename ArrayTypes<DeviceType>::tdual_double_1d k_tforce; + typename ArrayTypes<DeviceType>::t_double_1d d_tforce; + HAT::t_double_1d h_tforce; + + typename ArrayTypes<DeviceType>::t_v_array v; + typename ArrayTypes<DeviceType>::t_f_array f; + typename ArrayTypes<DeviceType>::t_int_1d type; + typename ArrayTypes<DeviceType>::t_int_1d mask; + + typename ArrayTypes<DeviceType>::tdual_double_1d k_gfactor1, k_gfactor2, k_ratio; + typename ArrayTypes<DeviceType>::t_double_1d d_gfactor1, d_gfactor2, d_ratio; + HAT::t_double_1d h_gfactor1, h_gfactor2, h_ratio; + + typedef Kokkos::DualView<double[3], DeviceType> + tdual_double_1d_3n; + tdual_double_1d_3n k_fsumall; + typename tdual_double_1d_3n::t_dev d_fsumall; + typename tdual_double_1d_3n::t_host h_fsumall; + + double boltz,dt,mvv2e,ftm2v,fran_prop_const; + + void compute_target(); + + Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool; + typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type; + + }; + + template <class DeviceType,int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, + int Tp_BIAS, int Tp_RMASS, int Tp_ZERO> + struct FixLangevinKokkosPostForceFunctor { + + typedef DeviceType device_type; + typedef FSUM value_type; + FixLangevinKokkos<DeviceType> c; + + FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr): + c(*c_ptr) {} + ~FixLangevinKokkosPostForceFunctor(){c.cleanup_copy();} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + c.template post_force_item<Tp_TSTYLEATOM,Tp_GJF, Tp_TALLY, + Tp_BIAS,Tp_RMASS,Tp_ZERO>(i); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int i, value_type &fsum) const { + + fsum += c.template post_force_item<Tp_TSTYLEATOM,Tp_GJF, Tp_TALLY, + Tp_BIAS,Tp_RMASS,Tp_ZERO>(i); + } + + KOKKOS_INLINE_FUNCTION + static void init(volatile value_type &update) { + update.fx = 0.0; + update.fy = 0.0; + update.fz = 0.0; + } + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type &update, + const volatile value_type &source) { + update.fx += source.fx; + update.fy += source.fy; + update.fz += source.fz; + } + + }; + + template <class DeviceType> + struct FixLangevinKokkosZeroForceFunctor { + typedef DeviceType device_type ; + FixLangevinKokkos<DeviceType> c; + + FixLangevinKokkosZeroForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr): + c(*c_ptr) {c.cleanup_copy();} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + c.zero_force_item(i); + } + }; + + template<class DeviceType> + struct FixLangevinKokkosTallyEnergyFunctor { + typedef DeviceType device_type ; + FixLangevinKokkos<DeviceType> c; + typedef double value_type; + FixLangevinKokkosTallyEnergyFunctor(FixLangevinKokkos<DeviceType>* c_ptr): + c(*c_ptr) {c.cleanup_copy();} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i, value_type &energy) const { + energy += c.compute_energy_item(i); + } + KOKKOS_INLINE_FUNCTION + static void init(volatile value_type &update) { + update = 0.0; + } + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type &update, + const volatile value_type &source) { + update += source; + } + }; +} + +#endif +#endif + +/* ERROR/WARNING messages: + + E: Illegal ... command + + Self-explanatory. Check the input script syntax and compare to the + documentation for the command. You can use -echo screen as a + command-line option when running LAMMPS to see the offending line. + + E: Fix langevin period must be > 0.0 + + The time window for temperature relaxation must be > 0 + + E: Fix langevin omega requires atom style sphere + + Self-explanatory. + + E: Fix langevin angmom requires atom style ellipsoid + + Self-explanatory. + + E: Variable name for fix langevin does not exist + + Self-explanatory. + + E: Variable for fix langevin is invalid style + + It must be an equal-style variable. + + E: Fix langevin omega requires extended particles + + One of the particles has radius 0.0. + + E: Fix langevin angmom requires extended particles + + This fix option cannot be used with point paritlces. + + E: Cannot zero Langevin force of 0 atoms + + The group has zero atoms, so you cannot request its force + be zeroed. + + E: Fix langevin variable returned negative temperature + + Self-explanatory. + + E: Could not find fix_modify temperature ID + + The compute ID for computing temperature does not exist. + + E: Fix_modify temperature ID does not compute temperature + + The compute ID assigned to the fix must compute temperature. + + W: Group for fix_modify temp != fix group + + The fix_modify command is specifying a temperature computation that + computes a temperature on a different group of atoms than the fix + itself operates on. This is probably not what you want to do. + +*/ diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp index 3076dca4fa..6f7f1e6300 100644 --- a/src/KOKKOS/fix_nve_kokkos.cpp +++ b/src/KOKKOS/fix_nve_kokkos.cpp @@ -172,6 +172,6 @@ void FixNVEKokkos<DeviceType>::cleanup_copy() } template class FixNVEKokkos<LMPDeviceType>; -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA template class FixNVEKokkos<LMPHostType>; #endif diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index b73e25dbc6..5ddd1bac60 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -23,8 +23,6 @@ using namespace LAMMPS_NS; -enum{FULL,HALFTHREAD,HALF}; - /* ---------------------------------------------------------------------- */ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) @@ -49,13 +47,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) strcmp(arg[iarg],"gpus") == 0) { if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args"); int ngpu = atoi(arg[iarg+1]); - iarg += 2; int skip_gpu = 9999; if (iarg+2 < narg && isdigit(arg[iarg+2][0])) { skip_gpu = atoi(arg[iarg+2]); iarg++; } + iarg += 2; char *str; if (str = getenv("SLURM_LOCALID")) { @@ -89,7 +87,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // initialize Kokkos -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA Kokkos::Cuda::host_mirror_device_type::initialize(num_threads,numa); Kokkos::Cuda::SelectDevice select_device(device); Kokkos::Cuda::initialize(select_device); @@ -112,7 +110,7 @@ KokkosLMP::~KokkosLMP() { // finalize Kokkos -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA Kokkos::Cuda::finalize(); Kokkos::Cuda::host_mirror_device_type::finalize(); #else diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index d115ed0bb9..4d62ec747d 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -14,71 +14,43 @@ #ifndef LMP_LMPTYPE_KOKKOS_H #define LMP_LMPTYPE_KOKKOS_H -#include <Kokkos_View.hpp> -#include <Kokkos_Macros.hpp> -#include <Kokkos_Atomic.hpp> +#include <Kokkos_Core.hpp> #include <Kokkos_DualView.hpp> #include <impl/Kokkos_Timer.hpp> #include <Kokkos_Vectorization.hpp> #define MAX_TYPES_STACKPARAMS 12 #define NeighClusterSize 8 -// set LMPHostype and LMPDeviceType -#ifndef DEVICE -#define DEVICE 1 +#ifndef __CUDACC__ + struct double2 { + double x, y; + }; + struct float2 { + float x, y; + }; + struct double4 { + double x, y, z, w; + }; + struct float4 { + float x, y, z, w; + }; #endif -#if DEVICE==1 - #ifdef KOKKOS_HAVE_OPENMP - #include "Kokkos_OpenMP.hpp" - typedef Kokkos::OpenMP LMPDeviceType; - typedef Kokkos::OpenMP LMPHostType; - #else - #include "Kokkos_Threads.hpp" - typedef Kokkos::Threads LMPDeviceType; - typedef Kokkos::Threads LMPHostType; - #endif - #ifndef __CUDACC__ - struct double2 { - double x, y; - }; - struct float2 { - float x, y; - }; - struct double4 { - double x, y, z, w; - }; - struct float4 { - float x, y, z, w; - }; - #endif -#else - #include "cuda.h" - #include "cuda_runtime.h" - #include "Kokkos_Cuda.hpp" - #include "Kokkos_Threads.hpp" - typedef Kokkos::Cuda LMPDeviceType; - typedef Kokkos::Cuda::host_mirror_device_type LMPHostType; -#endif +// set LMPHostype and LMPDeviceType from Kokkos Default Types +typedef Kokkos::DefaultExecutionSpace LMPDeviceType; +typedef Kokkos::DefaultExecutionSpace::host_mirror_device_type LMPHostType; // set ExecutionSpace stuct with variable "space" template<class Device> struct ExecutionSpaceFromDevice; -#ifdef KOKKOS_HAVE_OPENMP -template<> -struct ExecutionSpaceFromDevice<Kokkos::OpenMP> { - static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host; -}; -#else template<> -struct ExecutionSpaceFromDevice<Kokkos::Threads> { +struct ExecutionSpaceFromDevice<LMPHostType> { static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host; }; -#endif -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA template<> struct ExecutionSpaceFromDevice<Kokkos::Cuda> { static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device; @@ -142,16 +114,27 @@ struct s_EV_FLOAT { } KOKKOS_INLINE_FUNCTION - s_EV_FLOAT& operator+=(const s_EV_FLOAT &rhs) { - evdwl += rhs.evdwl; - ecoul += rhs.ecoul; - v[0] += rhs.v[0]; - v[1] += rhs.v[1]; - v[2] += rhs.v[2]; - v[3] += rhs.v[3]; - v[4] += rhs.v[4]; - v[5] += rhs.v[5]; - return *this; + void operator+=(const s_EV_FLOAT &rhs) { + evdwl += rhs.evdwl; + ecoul += rhs.ecoul; + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + v[2] += rhs.v[2]; + v[3] += rhs.v[3]; + v[4] += rhs.v[4]; + v[5] += rhs.v[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile s_EV_FLOAT &rhs) volatile { + evdwl += rhs.evdwl; + ecoul += rhs.ecoul; + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + v[2] += rhs.v[2]; + v[3] += rhs.v[3]; + v[4] += rhs.v[4]; + v[5] += rhs.v[5]; } }; typedef struct s_EV_FLOAT EV_FLOAT; @@ -240,7 +223,7 @@ typedef tdual_int_2d::t_dev_const_um t_int_2d_const_um; typedef tdual_int_2d::t_dev_const_randomread t_int_2d_randomread; typedef Kokkos:: - DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> + DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_1d; typedef tdual_tagint_1d::t_dev t_tagint_1d; typedef tdual_tagint_1d::t_dev_const t_tagint_1d_const; @@ -249,7 +232,16 @@ typedef tdual_tagint_1d::t_dev_const_um t_tagint_1d_const_um; typedef tdual_tagint_1d::t_dev_const_randomread t_tagint_1d_randomread; typedef Kokkos:: - DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType> + DualView<LAMMPS_NS::tagint**, LMPDeviceType::array_layout, LMPDeviceType> + tdual_tagint_2d; +typedef tdual_tagint_2d::t_dev t_tagint_2d; +typedef tdual_tagint_2d::t_dev_const t_tagint_2d_const; +typedef tdual_tagint_2d::t_dev_um t_tagint_2d_um; +typedef tdual_tagint_2d::t_dev_const_um t_tagint_2d_const_um; +typedef tdual_tagint_2d::t_dev_const_randomread t_tagint_2d_randomread; + +typedef Kokkos:: + DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_imageint_1d; typedef tdual_imageint_1d::t_dev t_imageint_1d; typedef tdual_imageint_1d::t_dev_const t_imageint_1d_const; @@ -257,6 +249,22 @@ typedef tdual_imageint_1d::t_dev_um t_imageint_1d_um; typedef tdual_imageint_1d::t_dev_const_um t_imageint_1d_const_um; typedef tdual_imageint_1d::t_dev_const_randomread t_imageint_1d_randomread; +typedef Kokkos:: + DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d; +typedef tdual_double_1d::t_dev t_double_1d; +typedef tdual_double_1d::t_dev_const t_double_1d_const; +typedef tdual_double_1d::t_dev_um t_double_1d_um; +typedef tdual_double_1d::t_dev_const_um t_double_1d_const_um; +typedef tdual_double_1d::t_dev_const_randomread t_double_1d_randomread; + +typedef Kokkos:: + DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d; +typedef tdual_double_2d::t_dev t_double_2d; +typedef tdual_double_2d::t_dev_const t_double_2d_const; +typedef tdual_double_2d::t_dev_um t_double_2d_um; +typedef tdual_double_2d::t_dev_const_um t_double_2d_const_um; +typedef tdual_double_2d::t_dev_const_randomread t_double_2d_randomread; + // 1d float array n typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d; @@ -406,7 +414,7 @@ typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread; }; -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA template <> struct ArrayTypes<LMPHostType> { @@ -446,13 +454,40 @@ typedef tdual_tagint_1d::t_host_um t_tagint_1d_um; typedef tdual_tagint_1d::t_host_const_um t_tagint_1d_const_um; typedef tdual_tagint_1d::t_host_const_randomread t_tagint_1d_randomread; -typedef Kokkos::DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_imageint_1d; +typedef Kokkos:: + DualView<LAMMPS_NS::tagint**, LMPDeviceType::array_layout, LMPDeviceType> + tdual_tagint_2d; +typedef tdual_tagint_2d::t_host t_tagint_2d; +typedef tdual_tagint_2d::t_host_const t_tagint_2d_const; +typedef tdual_tagint_2d::t_host_um t_tagint_2d_um; +typedef tdual_tagint_2d::t_host_const_um t_tagint_2d_const_um; +typedef tdual_tagint_2d::t_host_const_randomread t_tagint_2d_randomread; + +typedef Kokkos:: + DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType> + tdual_imageint_1d; typedef tdual_imageint_1d::t_host t_imageint_1d; typedef tdual_imageint_1d::t_host_const t_imageint_1d_const; typedef tdual_imageint_1d::t_host_um t_imageint_1d_um; typedef tdual_imageint_1d::t_host_const_um t_imageint_1d_const_um; typedef tdual_imageint_1d::t_host_const_randomread t_imageint_1d_randomread; +typedef Kokkos:: + DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d; +typedef tdual_double_1d::t_host t_double_1d; +typedef tdual_double_1d::t_host_const t_double_1d_const; +typedef tdual_double_1d::t_host_um t_double_1d_um; +typedef tdual_double_1d::t_host_const_um t_double_1d_const_um; +typedef tdual_double_1d::t_host_const_randomread t_double_1d_randomread; + +typedef Kokkos:: + DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d; +typedef tdual_double_2d::t_host t_double_2d; +typedef tdual_double_2d::t_host_const t_double_2d_const; +typedef tdual_double_2d::t_host_um t_double_2d_um; +typedef tdual_double_2d::t_host_const_um t_double_2d_const_um; +typedef tdual_double_2d::t_host_const_randomread t_double_2d_randomread; + //1d float array n typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d; typedef tdual_float_1d::t_host t_float_1d; diff --git a/src/KOKKOS/neigh_full_kokkos.h b/src/KOKKOS/neigh_full_kokkos.h index 9112e5049a..effc1cf622 100644 --- a/src/KOKKOS/neigh_full_kokkos.h +++ b/src/KOKKOS/neigh_full_kokkos.h @@ -13,6 +13,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" +#include "domain_kokkos.h" using namespace LAMMPS_NS; @@ -24,7 +25,7 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list) const int nall = includegroup?atom->nfirst:atom->nlocal; list->grow(nall); - NeighborKokkosExecute<DeviceType> + NeighborKokkosExecute<DeviceType> data(*list, k_cutneighsq.view<DeviceType>(), k_bincount.view<DeviceType>(), @@ -33,14 +34,46 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list) atomKK->k_type.view<DeviceType>(), atomKK->k_mask.view<DeviceType>(), atomKK->k_molecule.view<DeviceType>(), + atomKK->k_tag.view<DeviceType>(), + atomKK->k_special.view<DeviceType>(), + atomKK->k_nspecial.view<DeviceType>(), + atomKK->molecular, nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, bininvx,bininvy,bininvz, - bboxhi,bboxlo); + exclude, nex_type,maxex_type, + k_ex1_type.view<DeviceType>(), + k_ex2_type.view<DeviceType>(), + k_ex_type.view<DeviceType>(), + nex_group,maxex_group, + k_ex1_group.view<DeviceType>(), + k_ex2_group.view<DeviceType>(), + k_ex1_bit.view<DeviceType>(), + k_ex2_bit.view<DeviceType>(), + nex_mol, maxex_mol, + k_ex_mol_group.view<DeviceType>(), + k_ex_mol_bit.view<DeviceType>(), + bboxhi,bboxlo, + domain->xperiodic,domain->yperiodic,domain->zperiodic, + domain->xprd_half,domain->yprd_half,domain->zprd_half); k_cutneighsq.sync<DeviceType>(); - atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK); + k_ex1_type.sync<DeviceType>(); + k_ex2_type.sync<DeviceType>(); + k_ex_type.sync<DeviceType>(); + k_ex1_group.sync<DeviceType>(); + k_ex2_group.sync<DeviceType>(); + k_ex1_bit.sync<DeviceType>(); + k_ex2_bit.sync<DeviceType>(); + k_ex_mol_group.sync<DeviceType>(); + k_ex_mol_bit.sync<DeviceType>(); + atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); Kokkos::deep_copy(list->d_stencil,list->h_stencil); + data.special_flag[0] = special_flag[0]; + data.special_flag[1] = special_flag[1]; + data.special_flag[2] = special_flag[2]; + data.special_flag[3] = special_flag[3]; + while(data.h_resize() > 0) { data.h_resize() = 0; deep_copy(data.resize, data.h_resize); @@ -78,24 +111,24 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list) Kokkos::deep_copy(data.resize, data.h_resize); Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA #define BINS_PER_BLOCK 2 const int factor = atoms_per_bin<64?2:1; - Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor); + Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor); #else const int factor = 1; #endif if(newton_pair) { NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA Kokkos::parallel_for(config, f); #else Kokkos::parallel_for(nall, f); #endif } else { NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA Kokkos::parallel_for(config, f); #else Kokkos::parallel_for(nall, f); @@ -134,6 +167,62 @@ void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const } } +/* ---------------------------------------------------------------------- */ +template<class Device> +KOKKOS_INLINE_FUNCTION +int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const +{ + const int n1 = nspecial(i,0); + const int n2 = nspecial(i,1); + const int n3 = nspecial(i,2); + + for (int k = 0; k < n3; k++) { + if (special(i,k) == tag(j)) { + if (k < n1) { + if (special_flag[1] == 0) return -1; + else if (special_flag[1] == 1) return 0; + else return 1; + } else if (k < n2) { + if (special_flag[2] == 0) return -1; + else if (special_flag[2] == 1) return 0; + else return 2; + } else { + if (special_flag[3] == 0) return -1; + else if (special_flag[3] == 1) return 0; + else return 3; + } + } + } + return 0; +}; + +/* ---------------------------------------------------------------------- */ + +template<class Device> +KOKKOS_INLINE_FUNCTION +int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j, + const int &itype,const int &jtype) const +{ + int m; + + if (nex_type && ex_type(itype,jtype)) return 1; + + if (nex_group) { + for (m = 0; m < nex_group; m++) { + if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1; + if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1; + } + } + + if (nex_mol) { + for (m = 0; m < nex_mol; m++) + if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) && + molecule(i) == molecule(j)) return 1; + } + + return 0; +} + /* ---------------------------------------------------------------------- */ template<class Device> template<int HalfNeigh,int GhostNewton> @@ -142,7 +231,10 @@ void NeighborKokkosExecute<Device>:: { /* if necessary, goto next page and add pages */ int n = 0; - + int which = 0; + int moltemplate; + if (molecular == 2) moltemplate = 1; + else moltemplate = 0; // get subview of neighbors of i const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); @@ -161,52 +253,81 @@ void NeighborKokkosExecute<Device>:: if(HalfNeigh) for(int m = 0; m < c_bincount(ibin); m++) { const int j = c_bins(ibin,m); - // printf("%i %i %i\n",i,ibin,m,c_bincount(ibin),j); const int jtype = type(j); + //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) || (HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) ) continue; - //if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - + if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - x(j, 0); const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; if(rsq <= cutneighsq(itype,jtype)) { - if(n<neigh_list.maxneighs) neighbors_i(n) = j; - n++; + if (molecular) { + if (!moltemplate) + which = find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + }else if (minimum_image_check(delx,dely,delz)){ + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + } + else if (which > 0) { + if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS); + } + } else { + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + } } } for(int k = 0; k < nstencil; k++) { const int jbin = ibin + stencil[k]; // get subview of jbin - if(!GhostNewton&&HalfNeigh&&(ibin==jbin)) continue; + if(HalfNeigh&&(ibin==jbin)) continue; //const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL); for(int m = 0; m < c_bincount(jbin); m++) { const int j = c_bins(jbin,m); - //if(i==0) - //printf("%i %i %i %i %i %i %i\n",i,jbin,m,c_bincount(jbin),j,k,stencil[k]); const int jtype = type(j); if(HalfNeigh && !GhostNewton && (j < i)) continue; if(!HalfNeigh && j==i) continue; - //if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - x(j, 0); const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; - //if(i==0) - //printf("%i %i %lf %lf NEIGHS\n",i,j,rsq,cutneighsq(itype,jtype)); if(rsq <= cutneighsq(itype,jtype)) { - if(n<neigh_list.maxneighs) neighbors_i(n) = j; - n++; + if (molecular) { + if (!moltemplate) + which = find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + }else if (minimum_image_check(delx,dely,delz)){ + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + } + else if (which > 0) { + if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS); + } + } else { + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + } } } @@ -222,23 +343,23 @@ void NeighborKokkosExecute<Device>:: neigh_list.d_ilist(i) = i; } -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA extern __shared__ X_FLOAT sharedmem[]; /* ---------------------------------------------------------------------- */ -template<class DeviceType> template<int HalfNeigh> +template<class DeviceType> template<int HalfNeigh,int GhostNewton> __device__ inline -void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const +void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const { /* loop over atoms in i's bin, */ const int atoms_per_bin = c_bins.dimension_1(); - const int BINS_PER_TEAM = blockDim.x/atoms_per_bin; - const int MY_BIN = threadIdx.x/atoms_per_bin; - const int MY_II = threadIdx.x%atoms_per_bin; + const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin; + const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); + const int MY_BIN = dev.team_rank()/atoms_per_bin; - const int ibin = (blockIdx.x)*BINS_PER_TEAM+MY_BIN; + const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; if(ibin >=c_bincount.dimension_0()) return; X_FLOAT* other_x = sharedmem; @@ -248,6 +369,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const int bincount_current = c_bincount[ibin]; + for(int kk = 0; kk < TEAMS_PER_BIN; kk++) { + const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; /* if necessary, goto next page and add pages */ @@ -278,17 +401,45 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const #pragma unroll 4 for(int m = 0; m < bincount_current; m++) { int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists - //if(j==i) continue; - if((j == i) || (HalfNeigh && (j < i))) continue; - + if((j == i) || + (HalfNeigh && !GhostNewton && (j < i)) || + (HalfNeigh && GhostNewton && + ((j < i) || + ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || + (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) + ) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - other_x[m]; const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; - const int jtype = other_x[m + 3 * atoms_per_bin]; const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; - if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j; + + if(rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + int which = 0; + if (!moltemplate) + which = find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + }else if (minimum_image_check(delx,dely,delz)){ + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + } + else if (which > 0) { + if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS); + } + } else { + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + } + } + } } __syncthreads(); @@ -319,15 +470,41 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const #pragma unroll 8 for(int m = 0; m < bincount_current; m++) { const int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; - if(HalfNeigh && (j < i)) continue; + //if(HalfNeigh && (j < i)) continue; + if(HalfNeigh && !GhostNewton && (j < i)) continue; + if(!HalfNeigh && j==i) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - other_x[m]; const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; - const int jtype = other_x[m + 3 * atoms_per_bin]; const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; - if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j; + + if(rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + int which = 0; + if (!moltemplate) + which = find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + }else if (minimum_image_check(delx,dely,delz)){ + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + } + else if (which > 0) { + if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS); + } + } else { + if(n<neigh_list.maxneighs) neighbors_i(n++) = j; + } + } + } } __syncthreads(); @@ -343,6 +520,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const if(n >= new_maxneighs()) new_maxneighs() = n; } + } } #endif @@ -361,12 +539,45 @@ void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list) atomKK->k_type.view<DeviceType>(), atomKK->k_mask.view<DeviceType>(), atomKK->k_molecule.view<DeviceType>(), + atomKK->k_tag.view<DeviceType>(), + atomKK->k_special.view<DeviceType>(), + atomKK->k_nspecial.view<DeviceType>(), + atomKK->molecular, nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, bininvx,bininvy,bininvz, - bboxhi,bboxlo); + exclude, nex_type,maxex_type, + k_ex1_type.view<DeviceType>(), + k_ex2_type.view<DeviceType>(), + k_ex_type.view<DeviceType>(), + nex_group,maxex_group, + k_ex1_group.view<DeviceType>(), + k_ex2_group.view<DeviceType>(), + k_ex1_bit.view<DeviceType>(), + k_ex2_bit.view<DeviceType>(), + nex_mol, maxex_mol, + k_ex_mol_group.view<DeviceType>(), + k_ex_mol_bit.view<DeviceType>(), + bboxhi,bboxlo, + domain->xperiodic,domain->yperiodic,domain->zperiodic, + domain->xprd_half,domain->yprd_half,domain->zprd_half); k_cutneighsq.sync<DeviceType>(); - atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK); + k_ex1_type.sync<DeviceType>(); + k_ex2_type.sync<DeviceType>(); + k_ex_type.sync<DeviceType>(); + k_ex1_group.sync<DeviceType>(); + k_ex2_group.sync<DeviceType>(); + k_ex1_bit.sync<DeviceType>(); + k_ex2_bit.sync<DeviceType>(); + k_ex_mol_group.sync<DeviceType>(); + k_ex_mol_bit.sync<DeviceType>(); + + data.special_flag[0] = special_flag[0]; + data.special_flag[1] = special_flag[1]; + data.special_flag[2] = special_flag[2]; + data.special_flag[3] = special_flag[3]; + + atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); Kokkos::deep_copy(list->d_stencil,list->h_stencil); DeviceType::fence(); @@ -407,24 +618,24 @@ void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list) Kokkos::deep_copy(data.resize, data.h_resize); Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA #define BINS_PER_BLOCK 2 const int factor = atoms_per_bin<64?2:1; - Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor); + Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor); #else const int factor = 1; #endif if(newton_pair) { NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -//#if DEVICE==2 +//#ifdef KOKKOS_HAVE_CUDA // Kokkos::parallel_for(config, f); //#else Kokkos::parallel_for(nall, f); //#endif } else { NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -//#if DEVICE==2 +//#ifdef KOKKOS_HAVE_CUDA // Kokkos::parallel_for(config, f); //#else Kokkos::parallel_for(nall, f); diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index dbb0aa5727..2730c15a2b 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -113,6 +113,6 @@ void NeighListKokkos<Device>::stencil_allocate(int smax, int style) } template class NeighListKokkos<LMPDeviceType>; -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA template class NeighListKokkos<LMPHostType>; #endif diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index fd4ac3acc9..fa0eaba76a 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -20,7 +20,7 @@ namespace LAMMPS_NS { -enum{FULL,HALFTHREAD,HALF,N2,FULLCLUSTER}; +enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u}; class AtomNeighbors { diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index adea823976..133ac7cea0 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -51,6 +51,16 @@ NeighborKokkos::~NeighborKokkos() delete [] pair_build_device; delete [] pair_build_host; + + memory->destroy_kokkos(k_ex1_type,ex1_type); + memory->destroy_kokkos(k_ex2_type,ex2_type); + memory->destroy_kokkos(k_ex1_group,ex1_group); + memory->destroy_kokkos(k_ex2_group,ex2_group); + memory->destroy_kokkos(k_ex_mol_group,ex_mol_group); + memory->destroy_kokkos(k_ex1_bit,ex1_bit); + memory->destroy_kokkos(k_ex2_bit,ex2_bit); + memory->destroy_kokkos(k_ex_mol_bit,ex_mol_bit); + } /* ---------------------------------------------------------------------- */ @@ -72,7 +82,7 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n) /* ---------------------------------------------------------------------- */ int NeighborKokkos::init_lists_kokkos() -{ +{ int i; for (i = 0; i < nlist_host; i++) delete lists_host[i]; @@ -211,6 +221,32 @@ void NeighborKokkos::init_list_grow_kokkos(int i) /* ---------------------------------------------------------------------- */ +void NeighborKokkos::init_ex_type_kokkos(int n) +{ + memory->create_kokkos(k_ex_type,ex_type,n+1,n+1,"neigh:ex_type"); + k_ex_type.modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- */ + +void NeighborKokkos::init_ex_bit_kokkos() +{ + memory->create_kokkos(k_ex1_bit, ex1_bit, nex_group, "neigh:ex1_bit"); + k_ex1_bit.modify<LMPHostType>(); + memory->create_kokkos(k_ex2_bit, ex2_bit, nex_group, "neigh:ex2_bit"); + k_ex2_bit.modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- */ + +void NeighborKokkos::init_ex_mol_bit_kokkos() +{ + memory->create_kokkos(k_ex_mol_bit, ex_mol_bit, nex_mol, "neigh:ex_mol_bit"); + k_ex_mol_bit.modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- */ + void NeighborKokkos::choose_build(int index, NeighRequest *rq) { if (rq->kokkos_host != 0) { @@ -264,6 +300,29 @@ void NeighborKokkos::setup_bins_kokkos(int i) } } +/* ---------------------------------------------------------------------- */ + +void NeighborKokkos::modify_ex_type_grow_kokkos(){ + memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type"); + k_ex1_type.modify<LMPHostType>(); + memory->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type"); + k_ex2_type.modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- */ +void NeighborKokkos::modify_ex_group_grow_kokkos(){ + memory->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group"); + k_ex1_group.modify<LMPHostType>(); + memory->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group"); + k_ex2_group.modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- */ +void NeighborKokkos::modify_mol_group_grow_kokkos(){ + memory->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group"); + k_ex_mol_group.modify<LMPHostType>(); +} + // include to trigger instantiation of templated functions #include "neigh_full_kokkos.h" diff --git a/src/KOKKOS/neighbor_kokkos.h b/src/KOKKOS/neighbor_kokkos.h index 9abbd00738..3736f1405b 100644 --- a/src/KOKKOS/neighbor_kokkos.h +++ b/src/KOKKOS/neighbor_kokkos.h @@ -17,6 +17,7 @@ #include "neighbor.h" #include "neigh_list_kokkos.h" #include "kokkos_type.h" +#include <math.h> namespace LAMMPS_NS { @@ -33,8 +34,15 @@ class NeighborKokkosExecute typename AT::t_int_2d bins; typename AT::t_int_2d_const c_bins; const typename AT::t_x_array_randomread x; - const typename AT::t_int_1d_const type,mask; - const typename AT::t_tagint_1d_const molecule; + const typename AT::t_int_1d_const type,mask,molecule; + + const typename AT::t_tagint_1d_const tag; + const typename AT::t_tagint_2d_const special; + const typename AT::t_int_2d_const nspecial; + const int molecular; + int moltemplate; + + int special_flag[4]; const int nbinx,nbiny,nbinz; const int mbinx,mbiny,mbinz; @@ -44,38 +52,88 @@ class NeighborKokkosExecute const int nlocal; + const int exclude; + + const int nex_type; + const int maxex_type; + const typename AT::t_int_1d_const ex1_type,ex2_type; + const typename AT::t_int_2d_const ex_type; + + const int nex_group; + const int maxex_group; + const typename AT::t_int_1d_const ex1_group,ex2_group; + const typename AT::t_int_1d_const ex1_bit,ex2_bit; + + const int nex_mol; + const int maxex_mol; + const typename AT::t_int_1d_const ex_mol_group; + const typename AT::t_int_1d_const ex_mol_bit; + typename AT::t_int_scalar resize; typename AT::t_int_scalar new_maxneighs; typename ArrayTypes<LMPHostType>::t_int_scalar h_resize; typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs; + const int xperiodic, yperiodic, zperiodic; + const int xprd_half, yprd_half, zprd_half; + NeighborKokkosExecute( - const NeighListKokkos<Device> &_neigh_list, - const typename AT::t_xfloat_2d_randomread &_cutneighsq, - const typename AT::t_int_1d &_bincount, - const typename AT::t_int_2d &_bins, - const int _nlocal, - const typename AT::t_x_array_randomread &_x, - const typename AT::t_int_1d_const &_type, - const typename AT::t_int_1d_const &_mask, - const typename AT::t_tagint_1d_const &_molecule, - const int & _nbinx,const int & _nbiny,const int & _nbinz, - const int & _mbinx,const int & _mbiny,const int & _mbinz, - const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo, - const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz, - const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo): + const NeighListKokkos<Device> &_neigh_list, + const typename AT::t_xfloat_2d_randomread &_cutneighsq, + const typename AT::t_int_1d &_bincount, + const typename AT::t_int_2d &_bins, + const int _nlocal, + const typename AT::t_x_array_randomread &_x, + const typename AT::t_int_1d_const &_type, + const typename AT::t_int_1d_const &_mask, + const typename AT::t_int_1d_const &_molecule, + const typename AT::t_tagint_1d_const &_tag, + const typename AT::t_tagint_2d_const &_special, + const typename AT::t_int_2d_const &_nspecial, + const int &_molecular, + const int & _nbinx,const int & _nbiny,const int & _nbinz, + const int & _mbinx,const int & _mbiny,const int & _mbinz, + const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo, + const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz, + const int & _exclude,const int & _nex_type,const int & _maxex_type, + const typename AT::t_int_1d_const & _ex1_type, + const typename AT::t_int_1d_const & _ex2_type, + const typename AT::t_int_2d_const & _ex_type, + const int & _nex_group,const int & _maxex_group, + const typename AT::t_int_1d_const & _ex1_group, + const typename AT::t_int_1d_const & _ex2_group, + const typename AT::t_int_1d_const & _ex1_bit, + const typename AT::t_int_1d_const & _ex2_bit, + const int & _nex_mol,const int & _maxex_mol, + const typename AT::t_int_1d_const & _ex_mol_group, + const typename AT::t_int_1d_const & _ex_mol_bit, + const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, + const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, + const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): neigh_list(_neigh_list), cutneighsq(_cutneighsq), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), nlocal(_nlocal), x(_x),type(_type),mask(_mask),molecule(_molecule), + tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular), nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz), mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz), mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo), - bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz) { + bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz), + exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type), + ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type), + nex_group(_nex_group),maxex_group(_maxex_group), + ex1_group(_ex1_group),ex2_group(_ex2_group), + ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol), + ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), + xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), + xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){ + + if (molecular == 2) moltemplate = 1; + else moltemplate = 0; bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; - + resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); #ifndef KOKKOS_USE_UVM h_resize = Kokkos::create_mirror_view(resize); @@ -103,10 +161,10 @@ class NeighborKokkosExecute KOKKOS_FUNCTION void build_cluster_Item(const int &i) const; -#if DEVICE==2 - template<int HalfNeigh> +#ifdef KOKKOS_HAVE_CUDA + template<int HalfNeigh, int GhostNewton> __device__ inline - void build_ItemCuda(Device dev) const; + void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const; #endif KOKKOS_INLINE_FUNCTION @@ -143,6 +201,21 @@ class NeighborKokkosExecute return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); } + + KOKKOS_INLINE_FUNCTION + int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const; + + KOKKOS_INLINE_FUNCTION + int find_special(const int &i, const int &j) const; + + KOKKOS_INLINE_FUNCTION + int minimum_image_check(double dx, double dy, double dz) const { + if (xperiodic && fabs(dx) > xprd_half) return 1; + if (yperiodic && fabs(dy) > yprd_half) return 1; + if (zperiodic && fabs(dz) > zprd_half) return 1; + return 0; + } + }; template<class Device> @@ -175,12 +248,12 @@ struct NeighborKokkosBuildFunctor { void operator() (const int & i) const { c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i); } -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA KOKKOS_INLINE_FUNCTION - void operator() (Device dev) const { - c.template build_ItemCuda<HALF_NEIGH>(dev); + void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const { + c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev); } - size_t shmem_size() const { return sharedsize; } + size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; } #endif }; @@ -220,15 +293,28 @@ class NeighborKokkos : public Neighbor { DAT::tdual_int_1d k_bincount; DAT::tdual_int_2d k_bins; + DAT::tdual_int_1d k_ex1_type,k_ex2_type; + DAT::tdual_int_2d k_ex_type; + DAT::tdual_int_1d k_ex1_group,k_ex2_group; + DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; + DAT::tdual_int_1d k_ex_mol_group; + DAT::tdual_int_1d k_ex_mol_bit; + void init_cutneighsq_kokkos(int); int init_lists_kokkos(); void init_list_flags1_kokkos(int); void init_list_flags2_kokkos(int); void init_list_grow_kokkos(int); + void init_ex_type_kokkos(int); + void init_ex_bit_kokkos(); + void init_ex_mol_bit_kokkos(); void choose_build(int, NeighRequest *); void build_kokkos(int); void setup_bins_kokkos(int); - + void modify_ex_type_grow_kokkos(); + void modify_ex_group_grow_kokkos(); + void modify_mol_group_grow_kokkos(); + typedef void (NeighborKokkos::*PairPtrHost) (class NeighListKokkos<LMPHostType> *); PairPtrHost *pair_build_host; diff --git a/src/KOKKOS/pair_coul_cut_kokkos.cpp b/src/KOKKOS/pair_coul_cut_kokkos.cpp new file mode 100644 index 0000000000..494df9267f --- /dev/null +++ b/src/KOKKOS/pair_coul_cut_kokkos.cpp @@ -0,0 +1,266 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_coul_cut_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairCoulCutKokkos<DeviceType>::PairCoulCutKokkos(LAMMPS *lmp) : PairCoulCut(lmp) +{ + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + cutsq = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairCoulCutKokkos<DeviceType>::~PairCoulCutKokkos() +{ + if (allocated) + memory->destroy_kokkos(k_cutsq, cutsq); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairCoulCutKokkos<DeviceType>::cleanup_copy() { + // WHY needed: this prevents parent copy from deallocating any arrays + allocated = 0; + cutsq = NULL; + eatom = NULL; + vatom = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + + if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + + double ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync<DeviceType>(); + k_cut_ljsq.template sync<DeviceType>(); + k_cut_coulsq.template sync<DeviceType>(); + k_params.template sync<DeviceType>(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view<DeviceType>(); + c_x = atomKK->k_x.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + q = atomKK->k_q.view<DeviceType>(); + type = atomKK->k_type.view<DeviceType>(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + qqrd2e = force->qqrd2e; + + // loop over neighbors of my atoms + + EV_FLOAT ev = pair_compute<PairCoulCutKokkos<DeviceType>,void > + (this,(NeighListKokkos<DeviceType>*)list); + + DeviceType::fence(); + + if (eflag) eng_coul += ev.ecoul; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); +} + +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairCoulCutKokkos<DeviceType>:: +compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, + const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT rinv = sqrt(r2inv); + F_FLOAT forcecoul; + + forcecoul = qqrd2e*(STACKPARAMS?m_params[itype][jtype].scale:params(itype,jtype).scale)* + qtmp *q(j) *rinv; + + return factor_coul*forcecoul*r2inv; +} + +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairCoulCutKokkos<DeviceType>:: +compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, + const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT rinv = sqrt(r2inv); + + return factor_coul*qqrd2e * (STACKPARAMS?m_params[itype][jtype].scale:params(itype,jtype).scale) + * qtmp *q(j)*rinv; +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairCoulCutKokkos<DeviceType>::allocate() +{ + PairCoulCut::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view<DeviceType>(); + + k_cut_ljsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_ljsq",n+1,n+1); + d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); + k_cut_coulsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_coulsq",n+1,n+1); + d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); + + k_params = Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType>("PairCoulCut::params",n+1,n+1); + params = k_params.d_view; +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairCoulCutKokkos<DeviceType>::settings(int narg, char **arg) +{ + // \todo check what should be the limit on narg + if (narg > 2) error->all(FLERR,"Illegal pair_style command"); + + PairCoulCut::settings(1,arg); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairCoulCutKokkos<DeviceType>::init_style() +{ + PairCoulCut::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value && + !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk"); + } +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template<class DeviceType> +double PairCoulCutKokkos<DeviceType>::init_one(int i, int j) +{ + double cutone = PairCoulCut::init_one(i,j); + + k_params.h_view(i,j).scale = scale[i][j]; + k_params.h_view(i,j).cutsq = cutone*cutone; + k_params.h_view(j,i) = k_params.h_view(i,j); + + if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) { + m_params[i][j] = m_params[j][i] = k_params.h_view(i,j); + m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone; + m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cutone*cutone; + m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cutone*cutone; + } + k_cutsq.h_view(i,j) = cutone*cutone; + k_cutsq.template modify<LMPHostType>(); + k_cut_ljsq.h_view(i,j) = cutone*cutone; + k_cut_ljsq.template modify<LMPHostType>(); + k_cut_coulsq.h_view(i,j) = cutone*cutone; + k_cut_coulsq.template modify<LMPHostType>(); + k_params.template modify<LMPHostType>(); + + return cutone; +} + + + +template class PairCoulCutKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class PairCoulCutKokkos<LMPHostType>; +#endif diff --git a/src/KOKKOS/pair_coul_cut_kokkos.h b/src/KOKKOS/pair_coul_cut_kokkos.h new file mode 100644 index 0000000000..96e4ca0f63 --- /dev/null +++ b/src/KOKKOS/pair_coul_cut_kokkos.h @@ -0,0 +1,137 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/cut/kk,PairCoulCutKokkos<LMPDeviceType>) +PairStyle(coul/cut/kk/device,PairCoulCutKokkos<LMPDeviceType>) +PairStyle(coul/cut/kk/host,PairCoulCutKokkos<LMPHostType>) + +#else + +#ifndef LMP_PAIR_COUL_CUT_KOKKOS_H +#define LMP_PAIR_COUL_CUT_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_coul_cut.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template<class DeviceType> +class PairCoulCutKokkos : public PairCoulCut { + public: + enum {EnabledNeighFlags=FULL&HALFTHREAD&HALF}; + enum {COUL_FLAG=1}; + typedef DeviceType device_type; + PairCoulCutKokkos(class LAMMPS *); + ~PairCoulCutKokkos(); + + void compute(int, int); + + void settings(int, char **); + void init_style(); + double init_one(int, int); + + struct params_coul{ + params_coul(){cutsq=0,scale=0;}; + params_coul(int i){cutsq=0,scale=0;}; + F_FLOAT cutsq, scale; + }; + + protected: + void cleanup_copy(); + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + return 0.0; + } + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const{ + return 0; + } + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType> k_params; + typename Kokkos::DualView<params_coul**, + Kokkos::LayoutRight,DeviceType>::t_dev_const params; + // hardwired to space for 15 atom types + params_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename ArrayTypes<DeviceType>::t_x_array_randomread x; + typename ArrayTypes<DeviceType>::t_x_array c_x; + typename ArrayTypes<DeviceType>::t_f_array f; + typename ArrayTypes<DeviceType>::t_float_1d_randomread q; + typename ArrayTypes<DeviceType>::t_int_1d_randomread type; + typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom; + typename ArrayTypes<DeviceType>::t_virial_array d_vatom; + + int newton_pair; + + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq; + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq; + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq; + + class AtomKokkos *atomKK; + int neighflag; + int nlocal,nall,eflag,vflag; + + double special_coul[4]; + double special_lj[4]; + double qqrd2e; + + void allocate(); + friend class PairComputeFunctor<PairCoulCutKokkos,FULL,true>; + friend class PairComputeFunctor<PairCoulCutKokkos,HALF,true>; + friend class PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,true>; + friend class PairComputeFunctor<PairCoulCutKokkos,N2,true>; + friend class PairComputeFunctor<PairCoulCutKokkos,FULLCLUSTER,true >; + friend class PairComputeFunctor<PairCoulCutKokkos,FULL,false>; + friend class PairComputeFunctor<PairCoulCutKokkos,HALF,false>; + friend class PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,false>; + friend class PairComputeFunctor<PairCoulCutKokkos,N2,false>; + friend class PairComputeFunctor<PairCoulCutKokkos,FULLCLUSTER,false >; + friend EV_FLOAT pair_compute<PairCoulCutKokkos,void>(PairCoulCutKokkos*, + NeighListKokkos<DeviceType>*); + friend void pair_virial_fdotr_compute<PairCoulCutKokkos>(PairCoulCutKokkos*); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index de67e7df0b..2204399a39 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -25,27 +25,72 @@ namespace LAMMPS_NS { +template<int Table> +struct CoulLongTable { + enum {DoTable = Table}; +}; + +// Tags for doing coulomb calculations or not +// They facilitate function overloading, since +// partial template specialization of member functions is not allowed +struct CoulTag {}; +struct NoCoulTag {}; + +template<int FLAG> +struct DoCoul { + typedef NoCoulTag type; +}; + +template<> +struct DoCoul<1> { + typedef CoulTag type; +}; + +// Determine memory traits for force array +// Do atomic trait when running HALFTHREAD neighbor list style +template<int NEIGHFLAG> +struct AtomicF { + enum {value = Kokkos::Unmanaged}; +}; + +template<> +struct AtomicF<HALFTHREAD> { + enum {value = Kokkos::Atomic|Kokkos::Unmanaged}; +}; + +//Specialisation for Neighborlist types Half, HalfThread, Full template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void> struct PairComputeFunctor { typedef typename PairStyle::device_type device_type ; + + // Reduction type, contains evdwl, ecoul and virial[6] typedef EV_FLOAT value_type; + // The copy of the pair style PairStyle c; + + // The force array is atomic for Half/Thread neighbor style + Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout, + device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > f; NeighListKokkos<device_type> list; PairComputeFunctor(PairStyle* c_ptr, NeighListKokkos<device_type>* list_ptr): - c(*c_ptr),list(*list_ptr) {}; + c(*c_ptr),f(c.f),list(*list_ptr) {}; + + // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();}; KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { return j >> SBBITS & 3; } + // Loop over neighbors of one atom without coulomb interaction + // This function is called in parallel template<int EVFLAG, int NEWTON_PAIR> KOKKOS_FUNCTION EV_FLOAT compute_item(const int& ii, - const NeighListKokkos<device_type> &list) const { + const NeighListKokkos<device_type> &list, const NoCoulTag&) const { EV_FLOAT ev; const int i = list.d_ilist[ii]; const X_FLOAT xtmp = c.x(i,0); @@ -77,25 +122,17 @@ struct PairComputeFunctor { fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; - if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) { - Kokkos::atomic_fetch_add(&c.f(j,0),-delx*fpair); - Kokkos::atomic_fetch_add(&c.f(j,1),-dely*fpair); - Kokkos::atomic_fetch_add(&c.f(j,2),-delz*fpair); - } - if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < c.nlocal)) { - c.f(j,0) -= delx*fpair; - c.f(j,1) -= dely*fpair; - c.f(j,2) -= delz*fpair; + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) { + f(j,0) -= delx*fpair; + f(j,1) -= dely*fpair; + f(j,2) -= delz*fpair; } if (EVFLAG) { if (c.eflag) { ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)* factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype); - if (c.COUL_FLAG) - ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)* - factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype); } if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz); @@ -103,16 +140,84 @@ struct PairComputeFunctor { } } - if (NEIGHFLAG == HALFTHREAD) { - Kokkos::atomic_fetch_add(&c.f(i,0),fxtmp); - Kokkos::atomic_fetch_add(&c.f(i,1),fytmp); - Kokkos::atomic_fetch_add(&c.f(i,2),fztmp); - } else { - c.f(i,0) += fxtmp; - c.f(i,1) += fytmp; - c.f(i,2) += fztmp; + + f(i,0) += fxtmp; + f(i,1) += fytmp; + f(i,2) += fztmp; + + return ev; + } + + // Loop over neighbors of one atom with coulomb interaction + // This function is called in parallel + template<int EVFLAG, int NEWTON_PAIR> + KOKKOS_FUNCTION + EV_FLOAT compute_item(const int& ii, + const NeighListKokkos<device_type> &list, const CoulTag& ) const { + EV_FLOAT ev; + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + const F_FLOAT qtmp = c.q(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + F_FLOAT fxtmp = 0.0; + F_FLOAT fytmp = 0.0; + F_FLOAT fztmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + const F_FLOAT factor_coul = c.special_coul[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + F_FLOAT fpair = F_FLOAT(); + + if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) + fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype); + if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) + fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp); + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) { + f(j,0) -= delx*fpair; + f(j,1) -= dely*fpair; + f(j,2) -= delz*fpair; + } + + if (EVFLAG) { + if (c.eflag) { + if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)* + factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype); + if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) + ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)* + c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp); + } + + if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz); + } + } } + f(i,0) += fxtmp; + f(i,1) += fytmp; + f(i,2) += fztmp; + return ev; } @@ -142,7 +247,7 @@ struct PairComputeFunctor { const E_FLOAT v5 = dely*delz*fpair; if (c.vflag_global) { - if (NEIGHFLAG) { + if (NEIGHFLAG!=FULL) { if (NEWTON_PAIR) { ev.v[0] += v0; ev.v[1] += v1; @@ -202,43 +307,17 @@ struct PairComputeFunctor { KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - if (c.newton_pair) compute_item<0,1>(i,list); - else compute_item<0,0>(i,list); + if (c.newton_pair) compute_item<0,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); + else compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); } KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &energy_virial) const { if (c.newton_pair) - energy_virial += compute_item<1,1>(i,list); + energy_virial += compute_item<1,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); else - energy_virial += compute_item<1,0>(i,list); - } - - KOKKOS_INLINE_FUNCTION - static void init(volatile value_type &update) { - update.evdwl = 0; - update.ecoul = 0; - update.v[0] = 0; - update.v[1] = 0; - update.v[2] = 0; - update.v[3] = 0; - update.v[4] = 0; - update.v[5] = 0; + energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); } - KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &source) { - update.evdwl += source.evdwl; - update.ecoul += source.ecoul; - update.v[0] += source.v[0]; - update.v[1] += source.v[1]; - update.v[2] += source.v[2]; - update.v[3] += source.v[3]; - update.v[4] += source.v[4]; - update.v[5] += source.v[5]; - } - - }; template <class PairStyle, bool STACKPARAMS, class Specialisation> @@ -261,8 +340,8 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> { template<int EVFLAG, int NEWTON_PAIR> KOKKOS_FUNCTION - EV_FLOAT compute_item(const device_type& dev, - const NeighListKokkos<device_type> &list) const { + EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, + const NeighListKokkos<device_type> &list, const NoCoulTag& ) const { EV_FLOAT ev; const int i = vectorization::global_thread_rank(dev); @@ -302,9 +381,6 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> { if (c.eflag) { ev.evdwl += 0.5* factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype); - if (c.COUL_FLAG) - ev.ecoul += 0.5* - factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype); } if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz); @@ -373,44 +449,18 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> { } KOKKOS_INLINE_FUNCTION - void operator()(const device_type& dev) const { - if (c.newton_pair) compute_item<0,1>(dev,list); - else compute_item<0,0>(dev,list); + void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const { + if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); + else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); } KOKKOS_INLINE_FUNCTION - void operator()(const device_type& dev, value_type &energy_virial) const { + void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const { if (c.newton_pair) - energy_virial += compute_item<1,1>(dev,list); + energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); else - energy_virial += compute_item<1,0>(dev,list); - } - - KOKKOS_INLINE_FUNCTION - static void init(volatile value_type &update) { - update.evdwl = 0; - update.ecoul = 0; - update.v[0] = 0; - update.v[1] = 0; - update.v[2] = 0; - update.v[3] = 0; - update.v[4] = 0; - update.v[5] = 0; - } - KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &source) { - update.evdwl += source.evdwl; - update.ecoul += source.ecoul; - update.v[0] += source.v[0]; - update.v[1] += source.v[1]; - update.v[2] += source.v[2]; - update.v[3] += source.v[3]; - update.v[4] += source.v[4]; - update.v[5] += source.v[5]; + energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); } - - }; template <class PairStyle, bool STACKPARAMS, class Specialisation> @@ -433,7 +483,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> { template<int EVFLAG, int NEWTON_PAIR> KOKKOS_FUNCTION EV_FLOAT compute_item(const int& ii, - const NeighListKokkos<device_type> &list) const { + const NeighListKokkos<device_type> &list, const NoCoulTag&) const { + (void) list; EV_FLOAT ev; const int i = ii;//list.d_ilist[ii]; const X_FLOAT xtmp = c.x(i,0); @@ -470,9 +521,6 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> { if (c.eflag) { ev.evdwl += 0.5* factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype); - if (c.COUL_FLAG) - ev.ecoul += 0.5* - factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype); } if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz); @@ -535,116 +583,156 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> { KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - compute_item<0,0>(i,list); + compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); } KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &energy_virial) const { - energy_virial += compute_item<1,0>(i,list); + energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type()); } +}; - KOKKOS_INLINE_FUNCTION - static void init(volatile value_type &update) { - update.evdwl = 0; - update.ecoul = 0; - update.v[0] = 0; - update.v[1] = 0; - update.v[2] = 0; - update.v[3] = 0; - update.v[4] = 0; - update.v[5] = 0; - } - KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &source) { - update.evdwl += source.evdwl; - update.ecoul += source.ecoul; - update.v[0] += source.v[0]; - update.v[1] += source.v[1]; - update.v[2] += source.v[2]; - update.v[3] += source.v[3]; - update.v[4] += source.v[4]; - update.v[5] += source.v[5]; - } +// Filter out Neighflags which are not supported for PairStyle +// The enable_if clause will invalidate the last parameter of the function, so that +// a match is only achieved, if PairStyle supports the specific neighborlist variant. +// This uses the fact that failure to match template parameters is not an error. +// By having the enable_if with a ! and without it, exactly one of the two versions of the functions +// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version +// or the real one further below. +template<class PairStyle, unsigned NEIGHFLAG, class Specialisation> +EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!(NEIGHFLAG&PairStyle::EnabledNeighFlags), NeighListKokkos<typename PairStyle::device_type>*>::type list) { + EV_FLOAT ev; + (void) fpair; + (void) list; + printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i",NEIGHFLAG,PairStyle::EnabledNeighFlags); + return ev; +} +template<class PairStyle, class Specialisation> +EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!(FULLCLUSTER&PairStyle::EnabledNeighFlags), NeighListKokkos<typename PairStyle::device_type>*>::type list) { + EV_FLOAT ev; + (void) fpair; + (void) list; + printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i",FULLCLUSTER,PairStyle::EnabledNeighFlags); + return ev; +} -}; +// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2 +template<class PairStyle, unsigned NEIGHFLAG, class Specialisation> +EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<NEIGHFLAG&PairStyle::EnabledNeighFlags, NeighListKokkos<typename PairStyle::device_type>*>::type list) { + EV_FLOAT ev; + if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { + PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); + else Kokkos::parallel_for(list->inum,ff); + } else { + PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); + else Kokkos::parallel_for(list->inum,ff); + } + return ev; +} +// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER template<class PairStyle, class Specialisation> -EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) { +EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<FULLCLUSTER&PairStyle::EnabledNeighFlags, NeighListKokkos<typename PairStyle::device_type>*>::type list) { EV_FLOAT ev; if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { - if (fpair->neighflag == FULL) { - PairComputeFunctor<PairStyle,FULL,false,Specialisation > - ff(fpair, list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - } else if (fpair->neighflag == HALFTHREAD) { - PairComputeFunctor<PairStyle,HALFTHREAD,false,Specialisation > - ff(fpair, list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - } else if (fpair->neighflag == HALF) { - PairComputeFunctor<PairStyle,HALF,false,Specialisation > - ff(fpair, list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - } else if (fpair->neighflag == N2) { - PairComputeFunctor<PairStyle,N2,false,Specialisation > - ff(fpair, list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev); - else Kokkos::parallel_for(fpair->nlocal,ff); - } else if (fpair->neighflag == FULLCLUSTER) { - typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation > - f_type; - f_type ff(fpair, list); - #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1; - #else - const int teamsize = 1; - #endif - const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev); - else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff); - } + typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation > + f_type; + f_type ff(fpair, list); + #ifdef KOKKOS_HAVE_CUDA + const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1; + #else + const int teamsize = 1; + #endif + const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; + Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev); + else Kokkos::parallel_for(config,ff); } else { - if (fpair->neighflag == FULL) { - PairComputeFunctor<PairStyle,FULL,true,Specialisation > - ff(fpair, list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - } else if (fpair->neighflag == HALFTHREAD) { - PairComputeFunctor<PairStyle,HALFTHREAD,true,Specialisation > - ff(fpair, list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - } else if (fpair->neighflag == HALF) { - PairComputeFunctor<PairStyle,HALF,true,Specialisation > - ff(fpair, list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - } else if (fpair->neighflag == N2) { - PairComputeFunctor<PairStyle,N2,true,Specialisation > - ff(fpair, list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev); - else Kokkos::parallel_for(fpair->nlocal,ff); - } else if (fpair->neighflag == FULLCLUSTER) { - typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation > - f_type; - f_type ff(fpair, list); - #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1; - #else - const int teamsize = 1; - #endif - const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev); - else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff); - } + typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation > + f_type; + f_type ff(fpair, list); + #ifdef KOKKOS_HAVE_CUDA + const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1; + #else + const int teamsize = 1; + #endif + const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; + Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev); + else Kokkos::parallel_for(config,ff); } return ev; } + +template<class PairStyle, class Specialisation> +EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) { + EV_FLOAT ev; + if (fpair->neighflag == FULL) { + ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list); + } else if (fpair->neighflag == HALFTHREAD) { + ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list); + } else if (fpair->neighflag == HALF) { + ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list); + } else if (fpair->neighflag == N2) { + ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list); + } else if (fpair->neighflag == FULLCLUSTER) { + ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list); + } + return ev; +} + +template<class DeviceType> +struct PairVirialFDotRCompute { + typedef ArrayTypes<DeviceType> AT; + typedef EV_FLOAT value_type; + typename AT::t_x_array_const x; + typename AT::t_f_array_const f; + const int offset; + + PairVirialFDotRCompute( typename AT::t_x_array_const x_, + typename AT::t_f_array_const f_, + const int offset_):x(x_),f(f_),offset(offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int j, value_type &energy_virial) const { + const int i = j + offset; + energy_virial.v[0] += f(i,0)*x(i,0); + energy_virial.v[1] += f(i,1)*x(i,1); + energy_virial.v[2] += f(i,2)*x(i,2); + energy_virial.v[3] += f(i,1)*x(i,0); + energy_virial.v[4] += f(i,2)*x(i,0); + energy_virial.v[5] += f(i,2)*x(i,1); + } +}; + +template<class PairStyle> +void pair_virial_fdotr_compute(PairStyle* fpair) { + EV_FLOAT virial; + if (fpair->neighbor->includegroup == 0) { + int nall = fpair->atom->nlocal + fpair->atom->nghost; + Kokkos::parallel_reduce(nall,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial); + } else { + Kokkos::parallel_reduce(fpair->atom->nfirst,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial); + EV_FLOAT virial_ghost; + Kokkos::parallel_reduce(fpair->atom->nghost,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,fpair->atom->nlocal),virial_ghost); + virial+=virial_ghost; + } + fpair->vflag_fdotr = 0; + fpair->virial[0] = virial.v[0]; + fpair->virial[1] = virial.v[1]; + fpair->virial[2] = virial.v[2]; + fpair->virial[3] = virial.v[3]; + fpair->virial[4] = virial.v[4]; + fpair->virial[5] = virial.v[5]; +} + + + + } #endif diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp new file mode 100644 index 0000000000..97fa91a453 --- /dev/null +++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp @@ -0,0 +1,347 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_lj_cut_coul_cut_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairLJCutCoulCutKokkos<DeviceType>::PairLJCutCoulCutKokkos(LAMMPS *lmp):PairLJCutCoulCut(lmp) +{ + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + cutsq = NULL; + cut_ljsq = NULL; + cut_coulsq = NULL; + +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairLJCutCoulCutKokkos<DeviceType>::~PairLJCutCoulCutKokkos() +{ + if (allocated){ + memory->destroy_kokkos(k_cutsq, cutsq); + memory->destroy_kokkos(k_cut_ljsq, cut_ljsq); + memory->destroy_kokkos(k_cut_coulsq, cut_coulsq); + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulCutKokkos<DeviceType>::cleanup_copy() { + // WHY needed: this prevents parent copy from deallocating any arrays + allocated = 0; + cutsq = NULL; + cut_ljsq = NULL; + cut_coulsq = NULL; + eatom = NULL; + vatom = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + + double evdwl = 0.0; + double ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync<DeviceType>(); + k_cut_ljsq.template sync<DeviceType>(); + k_cut_coulsq.template sync<DeviceType>(); + k_params.template sync<DeviceType>(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view<DeviceType>(); + c_x = atomKK->k_x.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + q = atomKK->k_q.view<DeviceType>(); + type = atomKK->k_type.view<DeviceType>(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + qqrd2e = force->qqrd2e; + newton_pair = force->newton_pair; + + // loop over neighbors of my atoms + + EV_FLOAT ev = pair_compute<PairLJCutCoulCutKokkos<DeviceType>,void > + (this,(NeighListKokkos<DeviceType>*)list); + + DeviceType::fence(); + + if (eflag) { + eng_vdwl += ev.evdwl; + eng_coul += ev.ecoul; + } + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- + compute LJ 12-6 pair force between atoms i and j + ---------------------------------------------------------------------- */ +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCutCoulCutKokkos<DeviceType>:: +compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + F_FLOAT forcelj; + + forcelj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + + return forcelj*r2inv; +} + +/* ---------------------------------------------------------------------- + compute coulomb pair force between atoms i and j + ---------------------------------------------------------------------- */ +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCutCoulCutKokkos<DeviceType>:: +compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT rinv = sqrt(r2inv); + F_FLOAT forcecoul; + + forcecoul = qqrd2e*qtmp*q(j) *rinv; + + return factor_coul*forcecoul*r2inv; +} + +/* ---------------------------------------------------------------------- + compute LJ 12-6 pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCutCoulCutKokkos<DeviceType>:: +compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + + return r6inv* + ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv + - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) + - (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset); + +} + +/* ---------------------------------------------------------------------- + compute coulomb pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCutCoulCutKokkos<DeviceType>:: +compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT rinv = sqrt(r2inv); + + return factor_coul*qqrd2e*qtmp*q(j)*rinv; + +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulCutKokkos<DeviceType>::allocate() +{ + PairLJCutCoulCut::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view<DeviceType>(); + memory->destroy(cut_ljsq); + memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); + memory->destroy(cut_coulsq); + memory->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); + k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulCut::params",n+1,n+1); + params = k_params.d_view; +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulCutKokkos<DeviceType>::settings(int narg, char **arg) +{ + if (narg > 2) error->all(FLERR,"Illegal pair_style command"); + + PairLJCutCoulCut::settings(1,arg); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulCutKokkos<DeviceType>::init_style() +{ + PairLJCutCoulCut::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value && + !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == N2) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == FULLCLUSTER) { + neighbor->requests[irequest]->full_cluster = 1; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk"); + } +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template<class DeviceType> +double PairLJCutCoulCutKokkos<DeviceType>::init_one(int i, int j) +{ + double cutone = PairLJCutCoulCut::init_one(i,j); + double cut_ljsqm = cut_ljsq[i][j]; + double cut_coulsqm = cut_coulsq[i][j]; + + k_params.h_view(i,j).lj1 = lj1[i][j]; + k_params.h_view(i,j).lj2 = lj2[i][j]; + k_params.h_view(i,j).lj3 = lj3[i][j]; + k_params.h_view(i,j).lj4 = lj4[i][j]; + k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cut_ljsq = cut_ljsqm; + k_params.h_view(i,j).cut_coulsq = cut_coulsqm; + + k_params.h_view(j,i) = k_params.h_view(i,j); + if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) { + m_params[i][j] = m_params[j][i] = k_params.h_view(i,j); + m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone; + m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm; + m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm; + } + k_cutsq.h_view(i,j) = cutone*cutone; + k_cutsq.template modify<LMPHostType>(); + k_cut_ljsq.h_view(i,j) = cut_ljsqm; + k_cut_ljsq.template modify<LMPHostType>(); + k_cut_coulsq.h_view(i,j) = cut_coulsqm; + k_cut_coulsq.template modify<LMPHostType>(); + k_params.template modify<LMPHostType>(); + + return cutone; +} + + + +template class PairLJCutCoulCutKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class PairLJCutCoulCutKokkos<LMPHostType>; +#endif diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h new file mode 100644 index 0000000000..cc91a5989a --- /dev/null +++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h @@ -0,0 +1,131 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/cut/kk,PairLJCutCoulCutKokkos<LMPDeviceType>) +PairStyle(lj/cut/coul/cut/kk/device,PairLJCutCoulCutKokkos<LMPDeviceType>) +PairStyle(lj/cut/coul/cut/kk/host,PairLJCutCoulCutKokkos<LMPHostType>) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_KOKKOS_H +#define LMP_PAIR_LJ_CUT_COUL_CUT_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_lj_cut_coul_cut.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template<class DeviceType> +class PairLJCutCoulCutKokkos : public PairLJCutCoulCut { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=1}; + typedef DeviceType device_type; + PairLJCutCoulCutKokkos(class LAMMPS *); + ~PairLJCutCoulCutKokkos(); + + void compute(int, int); + + void settings(int, char **); + void init_style(); + double init_one(int, int); + + struct params_lj_coul{ + params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; + params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; + F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; + }; + + protected: + void cleanup_copy(); + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params; + typename Kokkos::DualView<params_lj_coul**, + Kokkos::LayoutRight,DeviceType>::t_dev_const params; + // hardwired to space for 15 atom types + params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename ArrayTypes<DeviceType>::t_x_array_randomread x; + typename ArrayTypes<DeviceType>::t_x_array c_x; + typename ArrayTypes<DeviceType>::t_f_array f; + typename ArrayTypes<DeviceType>::t_int_1d_randomread type; + typename ArrayTypes<DeviceType>::t_float_1d_randomread q; + typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom; + typename ArrayTypes<DeviceType>::t_virial_array d_vatom; + + int newton_pair; + + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq; + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq; + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq; + + class AtomKokkos *atomKK; + int neighflag; + int nlocal,nall,eflag,vflag; + + double special_coul[4]; + double special_lj[4]; + double qqrd2e; + + void allocate(); + friend class PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true>; + friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,true>; + friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,true>; + friend class PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false>; + friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,false>; + friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,false>; + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALF,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALFTHREAD,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute<PairLJCutCoulCutKokkos,void>(PairLJCutCoulCutKokkos*, + NeighListKokkos<DeviceType>*); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp new file mode 100644 index 0000000000..5c4b834bee --- /dev/null +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp @@ -0,0 +1,496 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_lj_cut_coul_long_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairLJCutCoulLongKokkos<DeviceType>::PairLJCutCoulLongKokkos(LAMMPS *lmp):PairLJCutCoulLong(lmp) +{ + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + cutsq = NULL; + cut_ljsq = NULL; + cut_coulsq = NULL; + +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairLJCutCoulLongKokkos<DeviceType>::~PairLJCutCoulLongKokkos() +{ + if (allocated){ + memory->destroy_kokkos(k_cutsq, cutsq); + memory->destroy_kokkos(k_cut_ljsq, cut_ljsq); + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulLongKokkos<DeviceType>::cleanup_copy() { + // WHY needed: this prevents parent copy from deallocating any arrays + allocated = 0; + cutsq = NULL; + cut_ljsq = NULL; + eatom = NULL; + vatom = NULL; + ftable = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + + double evdwl = 0.0; + double ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync<DeviceType>(); + k_cut_ljsq.template sync<DeviceType>(); + k_cut_coulsq.template sync<DeviceType>(); + k_params.template sync<DeviceType>(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view<DeviceType>(); + c_x = atomKK->k_x.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + q = atomKK->k_q.view<DeviceType>(); + type = atomKK->k_type.view<DeviceType>(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + qqrd2e = force->qqrd2e; + newton_pair = force->newton_pair; + + // loop over neighbors of my atoms + + EV_FLOAT ev; + if(ncoultablebits) + ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<1> > + (this,(NeighListKokkos<DeviceType>*)list); + else + ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<0> > + (this,(NeighListKokkos<DeviceType>*)list); + + + DeviceType::fence(); + + if (eflag) { + eng_vdwl += ev.evdwl; + eng_coul += ev.ecoul; + } + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); +} + +/* ---------------------------------------------------------------------- + compute LJ 12-6 pair force between atoms i and j + ---------------------------------------------------------------------- */ +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCutCoulLongKokkos<DeviceType>:: +compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + F_FLOAT forcelj; + + forcelj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - + (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + + return forcelj*r2inv; +} + +/* ---------------------------------------------------------------------- + compute coulomb pair force between atoms i and j + ---------------------------------------------------------------------- */ +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCutCoulLongKokkos<DeviceType>:: +compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if(Specialisation::DoTable && rsq > tabinnersq) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable]; + F_FLOAT forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + return forcecoul/rsq; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT rinv = 1.0/r; + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv; + F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + + return forcecoul*rinv*rinv; + } +} + +/* ---------------------------------------------------------------------- + compute LJ 12-6 pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCutCoulLongKokkos<DeviceType>:: +compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + const F_FLOAT r2inv = 1.0/rsq; + const F_FLOAT r6inv = r2inv*r2inv*r2inv; + + return r6inv* + ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv + - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) + - (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset); + +} + +/* ---------------------------------------------------------------------- + compute coulomb pair potential energy between atoms i and j + ---------------------------------------------------------------------- */ +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairLJCutCoulLongKokkos<DeviceType>:: +compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const { + if(Specialisation::DoTable) { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; + const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable]; + F_FLOAT ecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; + const F_FLOAT prefactor = qtmp*q[j] * table; + ecoul -= (1.0-factor_coul)*prefactor; + } + return ecoul; + } else { + const F_FLOAT r = sqrt(rsq); + const F_FLOAT grij = g_ewald * r; + const F_FLOAT expm2 = exp(-grij*grij); + const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); + const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r; + F_FLOAT ecoul = prefactor * erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + return ecoul; + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulLongKokkos<DeviceType>::allocate() +{ + PairLJCutCoulLong::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view<DeviceType>(); + memory->destroy(cut_ljsq); + memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); + + memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); + k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulLong::params",n+1,n+1); + params = k_params.d_view; +} + +template<class DeviceType> +void PairLJCutCoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa) +{ + Pair::init_tables(cut_coul,cut_respa); + + typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type; + typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type; + + int ntable = 1; + for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + + + // Copy rtable and drtable + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for(int i = 0; i < ntable; i++) { + h_table(i) = rtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_rtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + for(int i = 0; i < ntable; i++) { + h_table(i) = drtable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_drtable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ftable and dftable + for(int i = 0; i < ntable; i++) { + h_table(i) = ftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for(int i = 0; i < ntable; i++) { + h_table(i) = dftable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dftable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy ctable and dctable + for(int i = 0; i < ntable; i++) { + h_table(i) = ctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_ctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for(int i = 0; i < ntable; i++) { + h_table(i) = dctable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_dctable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + // Copy etable and detable + for(int i = 0; i < ntable; i++) { + h_table(i) = etable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_etable = d_table; + } + + { + host_table_type h_table("HostTable",ntable); + table_type d_table("DeviceTable",ntable); + + for(int i = 0; i < ntable; i++) { + h_table(i) = detable[i]; + } + Kokkos::deep_copy(d_table,h_table); + d_detable = d_table; + } +} + + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulLongKokkos<DeviceType>::settings(int narg, char **arg) +{ + if (narg > 2) error->all(FLERR,"Illegal pair_style command"); + + PairLJCutCoulLong::settings(narg,arg); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairLJCutCoulLongKokkos<DeviceType>::init_style() +{ + PairLJCutCoulLong::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value && + !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk"); + } +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template<class DeviceType> +double PairLJCutCoulLongKokkos<DeviceType>::init_one(int i, int j) +{ + double cutone = PairLJCutCoulLong::init_one(i,j); + double cut_ljsqm = cut_ljsq[i][j]; + double cut_coulsqm = cut_coulsq; + + k_params.h_view(i,j).lj1 = lj1[i][j]; + k_params.h_view(i,j).lj2 = lj2[i][j]; + k_params.h_view(i,j).lj3 = lj3[i][j]; + k_params.h_view(i,j).lj4 = lj4[i][j]; + k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cut_ljsq = cut_ljsqm; + k_params.h_view(i,j).cut_coulsq = cut_coulsqm; + + k_params.h_view(j,i) = k_params.h_view(i,j); + if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) { + m_params[i][j] = m_params[j][i] = k_params.h_view(i,j); + m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone; + m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm; + m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm; + } + + k_cutsq.h_view(i,j) = cutone*cutone; + k_cutsq.template modify<LMPHostType>(); + k_cut_ljsq.h_view(i,j) = cut_ljsqm; + k_cut_ljsq.template modify<LMPHostType>(); + k_cut_coulsq.h_view(i,j) = cut_coulsqm; + k_cut_coulsq.template modify<LMPHostType>(); + k_params.template modify<LMPHostType>(); + + return cutone; +} + + + +template class PairLJCutCoulLongKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class PairLJCutCoulLongKokkos<LMPHostType>; +#endif diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h new file mode 100644 index 0000000000..d6512174f1 --- /dev/null +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h @@ -0,0 +1,147 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/long/kk,PairLJCutCoulLongKokkos<LMPDeviceType>) +PairStyle(lj/cut/coul/long/kk/device,PairLJCutCoulLongKokkos<LMPDeviceType>) +PairStyle(lj/cut/coul/long/kk/host,PairLJCutCoulLongKokkos<LMPHostType>) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_KOKKOS_H +#define LMP_PAIR_LJ_CUT_COUL_LONG_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_lj_cut_coul_long.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template<class DeviceType> +class PairLJCutCoulLongKokkos : public PairLJCutCoulLong { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=1}; + typedef DeviceType device_type; + PairLJCutCoulLongKokkos(class LAMMPS *); + ~PairLJCutCoulLongKokkos(); + + void compute(int, int); + + void settings(int, char **); + void init_tables(double cut_coul, double *cut_respa); + void init_style(); + double init_one(int, int); + + struct params_lj_coul{ + params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; + params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; + F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; + }; + + protected: + void cleanup_copy(); + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, + const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const; + + Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params; + typename Kokkos::DualView<params_lj_coul**, + Kokkos::LayoutRight,DeviceType>::t_dev_const params; + // hardwired to space for 15 atom types + params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename ArrayTypes<DeviceType>::t_x_array_randomread x; + typename ArrayTypes<DeviceType>::t_x_array c_x; + typename ArrayTypes<DeviceType>::t_f_array f; + typename ArrayTypes<DeviceType>::t_int_1d_randomread type; + typename ArrayTypes<DeviceType>::t_float_1d_randomread q; + typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom; + typename ArrayTypes<DeviceType>::t_virial_array d_vatom; + + int newton_pair; + + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq; + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq; + typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq; + typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq; + + typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread + d_rtable, d_drtable, d_ftable, d_dftable, + d_ctable, d_dctable, d_etable, d_detable; + class AtomKokkos *atomKK; + int neighflag; + int nlocal,nall,eflag,vflag; + + double special_coul[4]; + double special_lj[4]; + double qqrd2e; + + void allocate(); + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<1> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<1> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<1> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<1> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >; + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<1> >(PairLJCutCoulLongKokkos*, + NeighListKokkos<DeviceType>*); + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<0> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<0> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<0> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<0> >; + friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >; + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<0> >(PairLJCutCoulLongKokkos*, + NeighListKokkos<DeviceType>*); + friend void pair_virial_fdotr_compute<PairLJCutCoulLongKokkos>(PairLJCutCoulLongKokkos*); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp index 94576a36c7..c69fbe89f5 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp @@ -99,18 +99,18 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) c_x = atomKK->k_x.view<DeviceType>(); f = atomKK->k_f.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); + tag = atomKK->k_tag.view<DeviceType>(); nlocal = atom->nlocal; nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; special_lj[0] = force->special_lj[0]; special_lj[1] = force->special_lj[1]; special_lj[2] = force->special_lj[2]; special_lj[3] = force->special_lj[3]; - newton_pair = force->newton_pair; // loop over neighbors of my atoms EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); if (eflag) eng_vdwl += ev.evdwl; @@ -123,7 +123,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) virial[5] += ev.v[5]; } - if (vflag_fdotr) virial_fdotr_compute(); + if (vflag_fdotr) pair_virial_fdotr_compute(this); } template<class DeviceType> @@ -131,12 +131,15 @@ template<bool STACKPARAMS, class Specialisation> KOKKOS_INLINE_FUNCTION F_FLOAT PairLJCutKokkos<DeviceType>:: compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + (void) i; + (void) j; const F_FLOAT r2inv = 1.0/rsq; const F_FLOAT r6inv = r2inv*r2inv*r2inv; const F_FLOAT forcelj = r6inv * ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + return forcelj*r2inv; } @@ -145,8 +148,11 @@ template<bool STACKPARAMS, class Specialisation> KOKKOS_INLINE_FUNCTION F_FLOAT PairLJCutKokkos<DeviceType>:: compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + (void) i; + (void) j; const F_FLOAT r2inv = 1.0/rsq; const F_FLOAT r6inv = r2inv*r2inv*r2inv; + return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) - (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset); @@ -262,6 +268,6 @@ double PairLJCutKokkos<DeviceType>::init_one(int i, int j) template class PairLJCutKokkos<LMPDeviceType>; -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA template class PairLJCutKokkos<LMPHostType>; #endif diff --git a/src/KOKKOS/pair_lj_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_kokkos.h index 5c3c002af5..0ba40b4d54 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_kokkos.h @@ -31,6 +31,7 @@ namespace LAMMPS_NS { template<class DeviceType> class PairLJCutKokkos : public PairLJCut { public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; enum {COUL_FLAG=0}; typedef DeviceType device_type; PairLJCutKokkos(class LAMMPS *); @@ -76,9 +77,10 @@ class PairLJCutKokkos : public PairLJCut { typename ArrayTypes<DeviceType>::t_int_1d_randomread type; typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom; typename ArrayTypes<DeviceType>::t_virial_array d_vatom; - //typename ArrayTypes<DeviceType>::t_ffloat_1d special_lj; + typename ArrayTypes<DeviceType>::t_tagint_1d tag; int newton_pair; + double special_lj[4]; typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq; typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq; @@ -98,8 +100,13 @@ class PairLJCutKokkos : public PairLJCut { friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>; friend class PairComputeFunctor<PairLJCutKokkos,N2,false>; friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >; + friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*); friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*); - + friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*); }; } diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index cc8072991a..b4cc32adf5 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -153,8 +153,9 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in) const int teamsize = 1; #endif const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; - if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev); - else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f); + Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize); + if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); + else Kokkos::parallel_for(config,f); } } else { if (neighflag == FULL) { @@ -187,8 +188,9 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in) const int teamsize = 1; #endif const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; - if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev); - else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f); + Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize); + if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); + else Kokkos::parallel_for(config,f); } } DeviceType::fence(); @@ -203,7 +205,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in) virial[5] += ev.v[5]; } - if (vflag_fdotr) virial_fdotr_compute(); + if (vflag_fdotr) pair_virial_fdotr_compute(this); } template<class DeviceType> @@ -211,6 +213,8 @@ template<bool STACKPARAMS, class Specialisation> KOKKOS_INLINE_FUNCTION F_FLOAT PairTableKokkos<DeviceType>:: compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + (void) i; + (void) j; union_int_float_t rsq_lookup; double fpair; const int tidx = d_table_const.tabindex(itype,jtype); @@ -254,6 +258,8 @@ template<bool STACKPARAMS, class Specialisation> KOKKOS_INLINE_FUNCTION F_FLOAT PairTableKokkos<DeviceType>:: compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + (void) i; + (void) j; double evdwl; union_int_float_t rsq_lookup; const int tidx = d_table_const.tabindex(itype,jtype); @@ -292,128 +298,6 @@ compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c return evdwl; } -/* -template<class DeviceType> -template<int EVFLAG, int NEIGHFLAG, int NEWTON_PAIR,int TABSTYLE> -KOKKOS_FUNCTION -EV_FLOAT PairTableKokkos<DeviceType>:: -compute_item(const int &ii, const NeighListKokkos<DeviceType> &list) const -{ - EV_FLOAT ev; - const int tlm1 = tablength - 1; - union_int_float_t rsq_lookup; - const int i = list.d_ilist[ii]; - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); - const int itype = type(i); - - const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); - const int jnum = list.d_numneigh[i]; - - F_FLOAT fxtmp = 0.0; - F_FLOAT fytmp = 0.0; - F_FLOAT fztmp = 0.0; - - for (int jj = 0; jj < jnum; jj++) { - int j = neighbors_i(jj); - const F_FLOAT factor_lj = 1.0; //special_lj[sbmask(j)]; - j &= NEIGHMASK; - const X_FLOAT delx = xtmp - x(j,0); - const X_FLOAT dely = ytmp - x(j,1); - const X_FLOAT delz = ztmp - x(j,2); - const int jtype = type(j); - const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - - if (rsq < d_table_const.cutsq(itype,jtype)) { - double fpair; - const int tidx = d_table_const.tabindex(itype,jtype); - //const Table* const tb = &tables[tabindex[itype][jtype]]; - - //if (rsq < d_table_const.innersq(tidx)) - // error->one(FLERR,"Pair distance < table inner cutoff"); - - if (TABSTYLE == LOOKUP) { - const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); - fpair = factor_lj * d_table_const.f(tidx,itable); - if (EVFLAG) - ev.evdwl = d_table_const.e(tidx,itable); - } else if (TABSTYLE == LINEAR) { - const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); - const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); - const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); - fpair = factor_lj * value; - if (EVFLAG) - ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); - } else if (TABSTYLE == SPLINE) { - const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); - const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); - const double a = 1.0 - b; - const double value = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) + - ((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) * - d_table_const.deltasq6(tidx); - fpair = factor_lj * value; - if (EVFLAG) - ev.evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) + - ((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) * - d_table_const.deltasq6(tidx); - } else { - rsq_lookup.f = rsq; - int itable = rsq_lookup.i & d_table_const.nmask(tidx); - itable >>= d_table_const.nshiftbits(tidx); - const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable); - const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); - fpair = factor_lj * value; - if (EVFLAG) - ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); - } - - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; - if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { - Kokkos::atomic_fetch_add(&f(j,0),-delx*fpair); - Kokkos::atomic_fetch_add(&f(j,1),-dely*fpair); - Kokkos::atomic_fetch_add(&f(j,2),-delz*fpair); - } - - if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < nlocal)) { - f(j,0) -= delx*fpair; - f(j,1) -= dely*fpair; - f(j,2) -= delz*fpair; - } - - if(EVFLAG) { - if (eflag) { - ev.evdwl *= factor_lj; - } - - if (evflag) ev_tally<NEIGHFLAG>(ev,i,j -,fpair,delx,dely,delz); - } - } - } - - if (NEIGHFLAG == HALFTHREAD) { - Kokkos::atomic_fetch_add(&f(i,0),fxtmp); - Kokkos::atomic_fetch_add(&f(i,1),fytmp); - Kokkos::atomic_fetch_add(&f(i,2),fztmp); - } else { - f(i,0) += fxtmp; - f(i,1) += fytmp; - f(i,2) += fztmp; - } - - return ev; -} -*/ - template<class DeviceType> void PairTableKokkos<DeviceType>::create_kokkos_tables() { @@ -880,7 +764,6 @@ void PairTableKokkos<DeviceType>::param_extract(Table *tb, char *line) word = strtok(NULL," \t\n\r\f"); tb->fphi = atof(word); } else { - printf("WORD: %s\n",word); error->one(FLERR,"Invalid keyword in pair table parameters"); } word = strtok(NULL," \t\n\r\f"); @@ -1494,7 +1377,7 @@ void PairTableKokkos<DeviceType>::cleanup_copy() { } template class PairTableKokkos<LMPDeviceType>; -#if DEVICE==2 +#ifdef KOKKOS_HAVE_CUDA template class PairTableKokkos<LMPHostType>; #endif diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h index 317703c895..10c7c6db7b 100644 --- a/src/KOKKOS/pair_table_kokkos.h +++ b/src/KOKKOS/pair_table_kokkos.h @@ -41,6 +41,7 @@ template<class DeviceType> class PairTableKokkos : public Pair { public: + enum {EnabledNeighFlags=FULL&HALFTHREAD&HALF&N2&FULLCLUSTER}; enum {COUL_FLAG=0}; typedef DeviceType device_type; @@ -208,67 +209,10 @@ class PairTableKokkos : public Pair { friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >; friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >; friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >; -/*template<int FULL_NEIGH> - KOKKOS_INLINE_FUNCTION - void ev_tally(EV_FLOAT &ev, const int &i, const int &j, - const F_FLOAT &fpair, const F_FLOAT &delx, - const F_FLOAT &dely, const F_FLOAT &delz) const; -*/ -}; -/* -template <class DeviceType, int NEIGHFLAG, int TABSTYLE> -struct PairTableComputeFunctor { - typedef DeviceType device_type ; - typedef EV_FLOAT value_type; - - PairTableKokkos<DeviceType> c; - NeighListKokkos<DeviceType> list; - - PairTableComputeFunctor(PairTableKokkos<DeviceType>* c_ptr, - NeighListKokkos<DeviceType>* list_ptr): - c(*c_ptr),list(*list_ptr) {}; - ~PairTableComputeFunctor() {c.cleanup_copy();list.clean_copy();}; - - KOKKOS_INLINE_FUNCTION - void operator()(const int i) const { - if (c.newton_pair) c.template compute_item<0,NEIGHFLAG,1,TABSTYLE>(i,list); - else c.template compute_item<0,NEIGHFLAG,0,TABSTYLE>(i,list); - } - KOKKOS_INLINE_FUNCTION - void operator()(const int i, value_type &energy_virial) const { - if (c.newton_pair) - energy_virial += c.template compute_item<1,NEIGHFLAG,1,TABSTYLE>(i,list); - else - energy_virial += c.template compute_item<1,NEIGHFLAG,0,TABSTYLE>(i,list); - } - - KOKKOS_INLINE_FUNCTION - static void init(volatile value_type &update) { - update.evdwl = 0; - update.ecoul = 0; - update.v[0] = 0; - update.v[1] = 0; - update.v[2] = 0; - update.v[3] = 0; - update.v[4] = 0; - update.v[5] = 0; - } - KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &source) { - update.evdwl += source.evdwl; - update.ecoul += source.ecoul; - update.v[0] += source.v[0]; - update.v[1] += source.v[1]; - update.v[2] += source.v[2]; - update.v[3] += source.v[3]; - update.v[4] += source.v[4]; - update.v[5] += source.v[5]; - } + friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*); }; -*/ diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 5f5d1542bd..1e7475dc49 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -53,6 +53,7 @@ VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) : void VerletKokkos::setup() { + if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n"); update->setupflag = 1; @@ -65,19 +66,24 @@ void VerletKokkos::setup() atomKK->setup(); modify->setup_pre_exchange(); // debug - atomKK->sync(Host,ALL_MASK); - atomKK->modified(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); + atomKK->modified(Host,ALL_MASK); if (triclinic) domain->x2lamda(atomKK->nlocal); domain->pbc(); atomKK->sync(Host,ALL_MASK); + domain->reset_box(); comm->setup(); if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + if (atomKK->sortfreq > 0) atomKK->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost); atomKK->sync(Host,ALL_MASK); @@ -97,20 +103,47 @@ void VerletKokkos::setup() force_clear(); modify->setup_pre_force(vflag); - if (pair_compute_flag) force->pair->compute(eflag,vflag); + if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + force->pair->compute(eflag,vflag); + timer->stamp(TIME_PAIR); + } else if (force->pair) force->pair->compute_dummy(eflag,vflag); + if (atomKK->molecular) { - if (force->bond) force->bond->compute(eflag,vflag); - if (force->angle) force->angle->compute(eflag,vflag); - if (force->dihedral) force->dihedral->compute(eflag,vflag); - if (force->improper) force->improper->compute(eflag,vflag); + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + force->bond->compute(eflag,vflag); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + force->angle->compute(eflag,vflag); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + force->dihedral->compute(eflag,vflag); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + force->improper->compute(eflag,vflag); + } + timer->stamp(TIME_BOND); } - if (force->kspace) { + if(force->kspace) { force->kspace->setup(); - if (kspace_compute_flag) force->kspace->compute(eflag,vflag); - else force->kspace->compute_dummy(eflag,vflag); + if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + force->kspace->compute(eflag,vflag); + timer->stamp(TIME_KSPACE); + } else force->kspace->compute_dummy(eflag,vflag); } if (force->newton) comm->reverse_comm(); @@ -172,20 +205,47 @@ void VerletKokkos::setup_minimal(int flag) force_clear(); modify->setup_pre_force(vflag); - if (pair_compute_flag) force->pair->compute(eflag,vflag); + if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + force->pair->compute(eflag,vflag); + timer->stamp(TIME_PAIR); + } else if (force->pair) force->pair->compute_dummy(eflag,vflag); + if (atomKK->molecular) { - if (force->bond) force->bond->compute(eflag,vflag); - if (force->angle) force->angle->compute(eflag,vflag); - if (force->dihedral) force->dihedral->compute(eflag,vflag); - if (force->improper) force->improper->compute(eflag,vflag); + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + force->bond->compute(eflag,vflag); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + force->angle->compute(eflag,vflag); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + force->dihedral->compute(eflag,vflag); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + force->improper->compute(eflag,vflag); + } + timer->stamp(TIME_BOND); } - if (force->kspace) { + if(force->kspace) { force->kspace->setup(); - if (kspace_compute_flag) force->kspace->compute(eflag,vflag); - else force->kspace->compute_dummy(eflag,vflag); + if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + force->kspace->compute(eflag,vflag); + timer->stamp(TIME_KSPACE); + } else force->kspace->compute_dummy(eflag,vflag); } if (force->newton) comm->reverse_comm(); @@ -291,31 +351,47 @@ void VerletKokkos::run(int n) timer->stamp(); if (pair_compute_flag) { + atomKK->sync(force->pair->execution_space,force->pair->datamask_read); + atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); force->pair->compute(eflag,vflag); timer->stamp(TIME_PAIR); } if (atomKK->molecular) { - if (force->bond) force->bond->compute(eflag,vflag); - if (force->angle) force->angle->compute(eflag,vflag); - if (force->dihedral) force->dihedral->compute(eflag,vflag); - if (force->improper) force->improper->compute(eflag,vflag); + if (force->bond) { + atomKK->sync(force->bond->execution_space,force->bond->datamask_read); + atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + force->bond->compute(eflag,vflag); + } + if (force->angle) { + atomKK->sync(force->angle->execution_space,force->angle->datamask_read); + atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + force->angle->compute(eflag,vflag); + } + if (force->dihedral) { + atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); + atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + force->dihedral->compute(eflag,vflag); + } + if (force->improper) { + atomKK->sync(force->improper->execution_space,force->improper->datamask_read); + atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + force->improper->compute(eflag,vflag); + } timer->stamp(TIME_BOND); } if (kspace_compute_flag) { + atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); + atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); force->kspace->compute(eflag,vflag); timer->stamp(TIME_KSPACE); } // reverse communication of forces - if (force->newton) { - atomKK->sync(Host,F_MASK); - comm->reverse_comm(); - atomKK->modified(Host,F_MASK); - timer->stamp(TIME_COMM); - } + if (force->newton) comm->reverse_comm(); + timer->stamp(TIME_COMM); // force modifications, final time integration, diagnostics -- GitLab