diff --git a/doc/src/pair_yukawa.txt b/doc/src/pair_yukawa.txt index 61d6bde6a92ab34ea2a42527f59a3227ab27b163..e7c063ded9f41227210e17970dc9fb7cc59ed757 100644 --- a/doc/src/pair_yukawa.txt +++ b/doc/src/pair_yukawa.txt @@ -9,6 +9,7 @@ pair_style yukawa command :h3 pair_style yukawa/gpu command :h3 pair_style yukawa/omp command :h3 +pair_style yukawa/kk command :h3 [Syntax:] diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index df5fc3e5f1a9d146a94f37435842d6076f4edff2..a382295be3556580921b86b6184c7fa1a6cd035a 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -229,6 +229,8 @@ action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp action pair_tersoff_mod_kokkos.h pair_tersoff_mod.h action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h +action pair_yukawa_kokkos.cpp +action pair_yukawa_kokkos.h action pppm_kokkos.cpp pppm.cpp action pppm_kokkos.h pppm.h action rand_pool_wrap_kokkos.cpp diff --git a/src/KOKKOS/pair_yukawa_kokkos.cpp b/src/KOKKOS/pair_yukawa_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d675ed2b892f21f729493a527cb102161a55faa --- /dev/null +++ b/src/KOKKOS/pair_yukawa_kokkos.cpp @@ -0,0 +1,301 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stefan Paquay (Brandeis University) +------------------------------------------------------------------------- */ +#include <math.h> +#include <stdlib.h> +#include "pair_yukawa_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairYukawaKokkos<DeviceType>::PairYukawaKokkos(LAMMPS *lmp) : PairYukawa(lmp) +{ + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + cutsq = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairYukawaKokkos<DeviceType>::~PairYukawaKokkos() +{ + if (allocated) { + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + k_cutsq = DAT::tdual_ffloat_2d(); + memory->sfree(cutsq); + eatom = NULL; + vatom = NULL; + cutsq = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairYukawaKokkos<DeviceType>::cleanup_copy() { + // WHY needed: this prevents parent copy from deallocating any arrays + allocated = 0; + cutsq = NULL; + eatom = NULL; + vatom = NULL; +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairYukawaKokkos<DeviceType>::allocate() +{ + PairYukawa::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view<DeviceType>(); + k_params = Kokkos::DualView<params_yukawa**, + Kokkos::LayoutRight,DeviceType>( + "PairYukawa::params",n+1,n+1); + + params = k_params.template view<DeviceType>(); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairYukawaKokkos<DeviceType>::init_style() +{ + PairYukawa::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value && + !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with yukawa/kk"); + } +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ +// Rewrite this. +template<class DeviceType> +double PairYukawaKokkos<DeviceType>::init_one(int i, int j) +{ + double cutone = PairYukawa::init_one(i,j); + + k_params.h_view(i,j).a = a[i][j]; + k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cutsq = cutone*cutone; + k_params.h_view(j,i) = k_params.h_view(i,j); + + if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) { + m_params[i][j] = m_params[j][i] = k_params.h_view(i,j); + m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone; + } + + k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; + k_cutsq.template modify<LMPHostType>(); + k_params.template modify<LMPHostType>(); + + return cutone; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairYukawaKokkos<DeviceType>::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag,0); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view<DeviceType>(); + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.view<DeviceType>(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync<DeviceType>(); + k_params.template sync<DeviceType>(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view<DeviceType>(); + c_x = atomKK->k_x.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + type = atomKK->k_type.view<DeviceType>(); + tag = atomKK->k_tag.view<DeviceType>(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + + // loop over neighbors of my atoms + + EV_FLOAT ev = pair_compute<PairYukawaKokkos<DeviceType>,void >( + this,(NeighListKokkos<DeviceType>*)list); + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify<DeviceType>(); + k_eatom.template sync<LMPHostType>(); + } + + if (vflag_atom) { + k_vatom.template modify<DeviceType>(); + k_vatom.template sync<LMPHostType>(); + } +} + + + +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairYukawaKokkos<DeviceType>:: +compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT rr = sqrt(rsq); + // Fetch the params either off the stack or from some mapped memory? + const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a + : params(itype,jtype).a; + + // U = a * exp(-kappa*r) / r + // f = (kappa * a * exp(-kappa*r) / r + a*exp(-kappa*r)/r^2)*grad(r) + // = (kappa + 1/r) * (a * exp(-kappa*r) / r) + // f/r = (kappa + 1/r) * (a * exp(-kappa*r) / r^2) + const F_FLOAT rinv = 1.0 / rr; + const F_FLOAT rinv2 = rinv*rinv; + const F_FLOAT screening = exp(-kappa*rr); + const F_FLOAT forceyukawa = aa * screening * (kappa + rinv); + const F_FLOAT fpair = forceyukawa * rinv2; + + return fpair; +} + +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairYukawaKokkos<DeviceType>:: +compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT rr = sqrt(rsq); + const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a + : params(itype,jtype).a; + const F_FLOAT offset = STACKPARAMS ? m_params[itype][jtype].offset + : params(itype,jtype).offset; + + // U = a * exp(-kappa*r) / r + // f = (kappa * a * exp(-kappa*r) / r + a*exp(-kappa*r)/r^2)*grad(r) + // = (kappa + 1/r) * (a * exp(-kappa*r) / r) + // f/r = (kappa + 1/r) * (a * exp(-kappa*r) / r^2) + const F_FLOAT rinv = 1.0 / rr; + const F_FLOAT screening = exp(-kappa*rr); + + return aa * screening * rinv - offset; +} + + +namespace LAMMPS_NS { +template class PairYukawaKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class PairYukawaKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/pair_yukawa_kokkos.h b/src/KOKKOS/pair_yukawa_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..a4c8cf05b781ddbfd7ad65c53eb900aba7e4a9a0 --- /dev/null +++ b/src/KOKKOS/pair_yukawa_kokkos.h @@ -0,0 +1,146 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(yukawa/kk,PairYukawaKokkos<LMPDeviceType>) +PairStyle(yukawa/kk/device,PairYukawaKokkos<LMPDeviceType>) +PairStyle(yukawa/kk/host,PairYukawaKokkos<LMPHostType>) + +#else + +#ifndef LMP_PAIR_YUKAWA_KOKKOS_H +#define LMP_PAIR_YUKAWA_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_yukawa.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template<class DeviceType> +class PairYukawaKokkos : public PairYukawa { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + PairYukawaKokkos(class LAMMPS *); + virtual ~PairYukawaKokkos(); + + void compute(int, int); + void init_style(); + double init_one(int,int); + + struct params_yukawa { + KOKKOS_INLINE_FUNCTION + params_yukawa(){ cutsq=0, a = 0; offset = 0; } + KOKKOS_INLINE_FUNCTION + params_yukawa(int i){ cutsq=0, a = 0; offset = 0; } + F_FLOAT cutsq, a, offset; + }; + + + protected: + void cleanup_copy(); + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const + { + return 0; + } + + + Kokkos::DualView<params_yukawa**,Kokkos::LayoutRight,DeviceType> k_params; + typename Kokkos::DualView<params_yukawa**,Kokkos::LayoutRight,DeviceType>::t_dev_const_um params; + params_yukawa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + typename AT::t_tagint_1d tag; + + int newton_pair; + double special_lj[4]; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + + + int neighflag; + int nlocal,nall,eflag,vflag; + + void allocate(); + friend class PairComputeFunctor<PairYukawaKokkos,FULL,true>; + friend class PairComputeFunctor<PairYukawaKokkos,HALF,true>; + friend class PairComputeFunctor<PairYukawaKokkos,HALFTHREAD,true>; + friend class PairComputeFunctor<PairYukawaKokkos,N2,true>; + friend class PairComputeFunctor<PairYukawaKokkos,FULL,false>; + friend class PairComputeFunctor<PairYukawaKokkos,HALF,false>; + friend class PairComputeFunctor<PairYukawaKokkos,HALFTHREAD,false>; + friend class PairComputeFunctor<PairYukawaKokkos,N2,false>; + friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,FULL,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALF,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALFTHREAD,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,N2,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute<PairYukawaKokkos,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend void pair_virial_fdotr_compute<PairYukawaKokkos>(PairYukawaKokkos*); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Cannot use chosen neighbor list style with yukawa/kk + +That style is not supported by Kokkos. + +*/ diff --git a/src/pair_yukawa.cpp b/src/pair_yukawa.cpp index 2ba6633d9e21719f8c06beccf263c88ac438110e..9be9237734343588dda84aceb4d650c067c8f525 100644 --- a/src/pair_yukawa.cpp +++ b/src/pair_yukawa.cpp @@ -139,7 +139,6 @@ void PairYukawa::allocate() setflag[i][j] = 0; memory->create(cutsq,n+1,n+1,"pair:cutsq"); - memory->create(rad,n+1,"pair:rad"); memory->create(cut,n+1,n+1,"pair:cut"); memory->create(a,n+1,n+1,"pair:a"); diff --git a/src/pair_yukawa.h b/src/pair_yukawa.h index 3859d163af5dd7a6843c7b5a178a53722691abf6..3222019a0ae94d4b4a9ccb4ca95fdcf3519ef59b 100644 --- a/src/pair_yukawa.h +++ b/src/pair_yukawa.h @@ -46,7 +46,7 @@ class PairYukawa : public Pair { double *rad; double **cut,**a,**offset; - void allocate(); + virtual void allocate(); }; }