From 6676724eeebc17c251cdaa1050315f604bc255a3 Mon Sep 17 00:00:00 2001 From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa> Date: Fri, 8 Feb 2013 16:57:12 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9404 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/USER-OMP/Install.sh | 8 +- src/USER-OMP/Package.sh | 34 +- src/USER-OMP/angle_cosine_periodic_omp.cpp | 6 +- src/USER-OMP/angle_fourier_omp.cpp | 171 ++++++ src/USER-OMP/angle_fourier_omp.h | 46 ++ src/USER-OMP/angle_fourier_simple_omp.cpp | 188 +++++++ src/USER-OMP/angle_fourier_simple_omp.h | 46 ++ src/USER-OMP/angle_quartic_omp.cpp | 180 ++++++ src/USER-OMP/angle_quartic_omp.h | 46 ++ src/USER-OMP/dihedral_fourier_omp.cpp | 274 +++++++++ src/USER-OMP/dihedral_fourier_omp.h | 46 ++ src/USER-OMP/dihedral_nharmonic_omp.cpp | 273 +++++++++ src/USER-OMP/dihedral_nharmonic_omp.h | 46 ++ src/USER-OMP/dihedral_quadratic_omp.cpp | 286 ++++++++++ src/USER-OMP/dihedral_quadratic_omp.h | 46 ++ src/USER-OMP/dihedral_table_omp.cpp | 106 ++-- src/USER-OMP/improper_fourier_omp.cpp | 282 ++++++++++ src/USER-OMP/improper_fourier_omp.h | 53 ++ src/USER-OMP/improper_ring_omp.cpp | 6 +- src/USER-OMP/msm_cg_omp.cpp | 522 ++++++++++++++++++ src/USER-OMP/msm_cg_omp.h | 141 +++++ src/USER-OMP/pair_airebo_omp.cpp | 6 +- src/USER-OMP/pair_beck_omp.cpp | 6 +- src/USER-OMP/pair_brownian_omp.cpp | 8 +- src/USER-OMP/pair_brownian_poly_omp.cpp | 22 +- src/USER-OMP/pair_colloid_omp.cpp | 10 +- src/USER-OMP/pair_dipole_sf_omp.cpp | 5 +- src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp | 35 +- .../pair_hbond_dreiding_morse_omp.cpp | 35 +- src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp | 229 ++++++++ src/USER-OMP/pair_lj_sdk_coul_msm_omp.h | 49 ++ src/USER-OMP/thr_omp.cpp | 7 + 32 files changed, 3102 insertions(+), 116 deletions(-) create mode 100644 src/USER-OMP/angle_fourier_omp.cpp create mode 100644 src/USER-OMP/angle_fourier_omp.h create mode 100644 src/USER-OMP/angle_fourier_simple_omp.cpp create mode 100644 src/USER-OMP/angle_fourier_simple_omp.h create mode 100644 src/USER-OMP/angle_quartic_omp.cpp create mode 100644 src/USER-OMP/angle_quartic_omp.h create mode 100644 src/USER-OMP/dihedral_fourier_omp.cpp create mode 100644 src/USER-OMP/dihedral_fourier_omp.h create mode 100644 src/USER-OMP/dihedral_nharmonic_omp.cpp create mode 100644 src/USER-OMP/dihedral_nharmonic_omp.h create mode 100644 src/USER-OMP/dihedral_quadratic_omp.cpp create mode 100644 src/USER-OMP/dihedral_quadratic_omp.h create mode 100644 src/USER-OMP/improper_fourier_omp.cpp create mode 100644 src/USER-OMP/improper_fourier_omp.h create mode 100644 src/USER-OMP/msm_cg_omp.cpp create mode 100644 src/USER-OMP/msm_cg_omp.h create mode 100644 src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp create mode 100644 src/USER-OMP/pair_lj_sdk_coul_msm_omp.h diff --git a/src/USER-OMP/Install.sh b/src/USER-OMP/Install.sh index 8e14d0dc45..eb30cd4621 100644 --- a/src/USER-OMP/Install.sh +++ b/src/USER-OMP/Install.sh @@ -1,11 +1,10 @@ # Install/unInstall package files in LAMMPS + +# step 1: process all *_omp.cpp and *_omp.h files. # do not install child files if parent does not exist for file in *_omp.cpp *_omp.h ; do - # let us see if the "rain man" can count the toothpicks... - ofile=`echo $file | sed \ - -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.h,\\\\1.h, \ - -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.cpp,\\\\1.cpp,` + ofile=`echo $file | sed -e 's,\(.*\)_omp\.\(h\|cpp\),\1.\2,' if (test $1 = 1) then if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then : # always install those files. @@ -20,6 +19,7 @@ for file in *_omp.cpp *_omp.h ; do fi done +# step 2: handle cases and tasks not handled in step 1. if (test $1 = 1) then if (test -e ../Makefile.package) then diff --git a/src/USER-OMP/Package.sh b/src/USER-OMP/Package.sh index afa02fd4fc..dee167cffd 100644 --- a/src/USER-OMP/Package.sh +++ b/src/USER-OMP/Package.sh @@ -1,23 +1,16 @@ # Update package files in LAMMPS -# copy package file to src if it doesn't exists or is different -# do not copy OpenMP style files, if a non-OpenMP version does -# not exist. Do remove OpenMP style files that have no matching -# non-OpenMP version installed, e.g. after a package has been -# removed -for file in *_omp.cpp *_omp.h thr_data.h thr_data.cpp; do - # let us see if the "rain man" can count the toothpicks... - ofile=`echo $file | sed \ - -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` - if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") \ - || (test $file = "thr_data.h") || (test $file = "thr_data.cpp") then - if (test ! -e ../$file) then - echo " creating src/$file" - cp $file .. - elif ! cmp -s $file ../$file ; then - echo " updating src/$file" - cp $file .. - fi - elif (test ! -e ../$ofile) then +# Copy package file to src if it doesn't exists or is different. +# But only copy the file, if a non-OpenMP version exists and +# remove OpenMP versions that have no matching serial file +# installed, e.g. after a package has been removed. +for file in *_omp.cpp *_omp.h ; do + # these are special cases and handled below + if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then + continue + fi + # derive name of non-OpenMP version + ofile=`echo $file | sed -e 's,\(.*\)_omp\.\(h\|cpp\),\1.\2,'` + if (test ! -e ../$ofile) then if (test -e ../$file) then echo " removing src/$file" rm -f ../$file @@ -33,7 +26,8 @@ for file in *_omp.cpp *_omp.h thr_data.h thr_data.cpp; do fi done -for file in thr_data.h thr_data.cpp; do +# special case for files not covered by the automatic script above +for file in thr_data.h thr_data.cpp thr_omp.h thr_omp.cpp; do if (test ! -e ../$file) then echo " creating src/$file" cp $file .. diff --git a/src/USER-OMP/angle_cosine_periodic_omp.cpp b/src/USER-OMP/angle_cosine_periodic_omp.cpp index 6c5932e70c..e43fabafdd 100644 --- a/src/USER-OMP/angle_cosine_periodic_omp.cpp +++ b/src/USER-OMP/angle_cosine_periodic_omp.cpp @@ -23,12 +23,14 @@ #include "domain.h" #include "math_const.h" +#include "math_special.h" #include <math.h> #include "suffix.h" using namespace LAMMPS_NS; using namespace MathConst; +using namespace MathSpecial; #define SMALL 0.001 @@ -161,8 +163,8 @@ void AngleCosinePeriodicOMP::eval(int nfrom, int nto, ThrData * const thr) un_2 = un_1; un_1 = un; } - tn = b_factor*pow(-1.0,(double)m)*tn; - un = b_factor*pow(-1.0,(double)m)*m*un; + tn = b_factor*powsign(m)*tn; + un = b_factor*powsign(m)*m*un; if (EFLAG) eangle = 2*k[type]*(1.0 - tn); diff --git a/src/USER-OMP/angle_fourier_omp.cpp b/src/USER-OMP/angle_fourier_omp.cpp new file mode 100644 index 0000000000..6765129947 --- /dev/null +++ b/src/USER-OMP/angle_fourier_omp.cpp @@ -0,0 +1,171 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_fourier_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include <math.h> + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleFourierOMP::AngleFourierOMP(class LAMMPS *lmp) + : AngleFourier(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleFourierOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void AngleFourierOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double term; + double rsq1,rsq2,r1,r2,c,c2,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + + c2 = 2.0*c*c-1.0; + term = k[type]*(C0[type]+C1[type]*c+C2[type]*c2); + + if (EFLAG) eangle = term; + + a = k[type]*(C1[type]+4.0*C2[type]*c); + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_fourier_omp.h b/src/USER-OMP/angle_fourier_omp.h new file mode 100644 index 0000000000..0185d09526 --- /dev/null +++ b/src/USER-OMP/angle_fourier_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(fourier/omp,AngleFourierOMP) + +#else + +#ifndef LMP_ANGLE_FOURIER_OMP_H +#define LMP_ANGLE_FOURIER_OMP_H + +#include "angle_fourier.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleFourierOMP : public AngleFourier, public ThrOMP { + + public: + AngleFourierOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_fourier_simple_omp.cpp b/src/USER-OMP/angle_fourier_simple_omp.cpp new file mode 100644 index 0000000000..d344028b9c --- /dev/null +++ b/src/USER-OMP/angle_fourier_simple_omp.cpp @@ -0,0 +1,188 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_fourier_simple_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include <math.h> + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleFourierSimpleOMP::AngleFourierSimpleOMP(class LAMMPS *lmp) + : AngleFourierSimple(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleFourierSimpleOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void AngleFourierSimpleOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double term,sgn; + double rsq1,rsq2,r1,r2,c,cn,th,nth,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + + th = acos(c); + nth = N[type]*acos(c); + cn = cos(nth); + term = k[type]*(1.0+C[type]*cn); + + if (EFLAG) eangle = term; + + // handle sin(n th)/sin(th) singulatiries + + if ( fabs(c)-1.0 > 0.0001 ) { + a = k[type]*C[type]*N[type]*sin(nth)/sin(th); + } else { + if ( c >= 0.0 ) { + term = 1.0 - c; + sgn = 1.0; + } else { + term = 1.0 + c; + sgn = ( fmodf((float)(N[type]),2.0) == 0.0f )?-1.0:1.0; + } + a = N[type]+N[type]*(1.0-N[type]*N[type])*term/3.0; + a = k[type]*C[type]*N[type]*(double)(sgn)*a; + } + + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_fourier_simple_omp.h b/src/USER-OMP/angle_fourier_simple_omp.h new file mode 100644 index 0000000000..4ff0ce154f --- /dev/null +++ b/src/USER-OMP/angle_fourier_simple_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(fourier/simple/omp,AngleFourierSimpleOMP) + +#else + +#ifndef LMP_ANGLE_FOURIER_SIMPLE_OMP_H +#define LMP_ANGLE_FOURIER_SIMPLE_OMP_H + +#include "angle_fourier_simple.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleFourierSimpleOMP : public AngleFourierSimple, public ThrOMP { + + public: + AngleFourierSimpleOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_quartic_omp.cpp b/src/USER-OMP/angle_quartic_omp.cpp new file mode 100644 index 0000000000..7306cc2f7d --- /dev/null +++ b/src/USER-OMP/angle_quartic_omp.cpp @@ -0,0 +1,180 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_quartic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include <math.h> + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleQuarticOMP::AngleQuarticOMP(class LAMMPS *lmp) + : AngleQuartic(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleQuarticOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void AngleQuarticOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dtheta,dtheta2,dtheta3,dtheta4,tk; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // force & energy + + dtheta = acos(c) - theta0[type]; + dtheta2 = dtheta * dtheta; + dtheta3 = dtheta2 * dtheta; + tk = 2.0 * k2[type] * dtheta + 3.0 * k3[type] * dtheta2 + 4.0 * k4[type] * dtheta3; + + if (EFLAG) { + dtheta4 = dtheta3 * dtheta; + eangle = k2[type] * dtheta2 + k3[type] * dtheta3 + k4[type] * dtheta4; + } + + a = -2.0 * tk * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_quartic_omp.h b/src/USER-OMP/angle_quartic_omp.h new file mode 100644 index 0000000000..72f8ffd821 --- /dev/null +++ b/src/USER-OMP/angle_quartic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(quartic/omp,AngleQuarticOMP) + +#else + +#ifndef LMP_ANGLE_QUARTIC_OMP_H +#define LMP_ANGLE_QUARTIC_OMP_H + +#include "angle_quartic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleQuarticOMP : public AngleQuartic, public ThrOMP { + + public: + AngleQuarticOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_fourier_omp.cpp b/src/USER-OMP/dihedral_fourier_omp.cpp new file mode 100644 index 0000000000..6b14367271 --- /dev/null +++ b/src/USER-OMP/dihedral_fourier_omp.cpp @@ -0,0 +1,274 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_fourier_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "math_const.h" +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define TOLERANCE 0.05 + +/* ---------------------------------------------------------------------- */ + +DihedralFourierOMP::DihedralFourierOMP(class LAMMPS *lmp) + : DihedralFourier(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralFourierOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void DihedralFourierOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,i,j,m,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv; + double df,df1_,ddf1_,fg,hg,fga,hgb,gaa,gbb; + double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz; + double c,s,p_,sx2,sy2,sz2; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + ax = vb1y*vb2zm - vb1z*vb2ym; + ay = vb1z*vb2xm - vb1x*vb2zm; + az = vb1x*vb2ym - vb1y*vb2xm; + bx = vb3y*vb2zm - vb3z*vb2ym; + by = vb3z*vb2xm - vb3x*vb2zm; + bz = vb3x*vb2ym - vb3y*vb2xm; + + rasq = ax*ax + ay*ay + az*az; + rbsq = bx*bx + by*by + bz*bz; + rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + rg = sqrt(rgsq); + + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + rabinv = sqrt(ra2inv*rb2inv); + + c = (ax*bx + ay*by + az*bz)*rabinv; + s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force and energy + // p = sum(i=1,nterms) k_i*(1+cos(n_i*phi-d_i) + // dp = dp / dphi + edihedral = 0.0; + df = 0.0; + for (j=0; j<nterms[type]; j++) { + m = multiplicity[type][j]; + p_ = 1.0; + df1_ = 0.0; + + for (i = 0; i < m; i++) { + ddf1_ = p_*c - df1_*s; + df1_ = p_*s + df1_*c; + p_ = ddf1_; + } + + p_ = p_*cos_shift[type][j] + df1_*sin_shift[type][j]; + df1_ = df1_*cos_shift[type][j] - ddf1_*sin_shift[type][j]; + df1_ *= -m; + p_ += 1.0; + + if (m == 0) { + p_ = 1.0 + cos_shift[type][j]; + df1_ = 0.0; + } + + if (EFLAG) edihedral += k[type][j] * p_; + + df += (-k[type][j] * df1_); + } + + fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + fga = fg*ra2inv*rginv; + hgb = hg*rb2inv*rginv; + gaa = -ra2inv*rg; + gbb = rb2inv*rg; + + dtfx = gaa*ax; + dtfy = gaa*ay; + dtfz = gaa*az; + dtgx = fga*ax - hgb*bx; + dtgy = fga*ay - hgb*by; + dtgz = fga*az - hgb*bz; + dthx = gbb*bx; + dthy = gbb*by; + dthz = gbb*bz; + + sx2 = df*dtgx; + sy2 = df*dtgy; + sz2 = df*dtgz; + + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_fourier_omp.h b/src/USER-OMP/dihedral_fourier_omp.h new file mode 100644 index 0000000000..1ae49e0167 --- /dev/null +++ b/src/USER-OMP/dihedral_fourier_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(fourier/omp,DihedralFourierOMP) + +#else + +#ifndef LMP_DIHEDRAL_FOURIER_OMP_H +#define LMP_DIHEDRAL_FOURIER_OMP_H + +#include "dihedral_fourier.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralFourierOMP : public DihedralFourier, public ThrOMP { + + public: + DihedralFourierOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_nharmonic_omp.cpp b/src/USER-OMP/dihedral_nharmonic_omp.cpp new file mode 100644 index 0000000000..8adf19a899 --- /dev/null +++ b/src/USER-OMP/dihedral_nharmonic_omp.cpp @@ -0,0 +1,273 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_nharmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +DihedralNHarmonicOMP::DihedralNHarmonicOMP(class LAMMPS *lmp) + : DihedralNHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralNHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void DihedralNHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s12,c,p,pd,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + double s2,sin2; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - c1mag*c1mag,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - c2mag*c2mag,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + // p = sum (i=1,n) a_i * c**(i-1) + // pd = dp/dc + p = a[type][0]; + pd = a[type][1]; + for (int i = 1; i < nterms[type]-1; i++) { + p += c * a[type][i]; + pd += c * static_cast<double>(i+1) * a[type][i+1]; + c *= c; + } + p += c * a[type][nterms[type]-1]; + + if (EFLAG) edihedral = p; + + c = c * pd; + s12 = s12 * pd; + a11 = c*sb1*s1; + a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1*(c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2*(c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_nharmonic_omp.h b/src/USER-OMP/dihedral_nharmonic_omp.h new file mode 100644 index 0000000000..7d9daeab97 --- /dev/null +++ b/src/USER-OMP/dihedral_nharmonic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(nharmonic/omp,DihedralNHarmonicOMP) + +#else + +#ifndef LMP_DIHEDRAL_NHARMONIC_OMP_H +#define LMP_DIHEDRAL_NHARMONIC_OMP_H + +#include "dihedral_nharmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralNHarmonicOMP : public DihedralNHarmonic, public ThrOMP { + + public: + DihedralNHarmonicOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_quadratic_omp.cpp b/src/USER-OMP/dihedral_quadratic_omp.cpp new file mode 100644 index 0000000000..3e5fc243b9 --- /dev/null +++ b/src/USER-OMP/dihedral_quadratic_omp.cpp @@ -0,0 +1,286 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_quadratic_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 +#define SMALLER 0.00001 + +/* ---------------------------------------------------------------------- */ + +DihedralQuadraticOMP::DihedralQuadraticOMP(class LAMMPS *lmp) + : DihedralQuadratic(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralQuadraticOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void DihedralQuadraticOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s12,c,p,pd,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + double s2,cx,cy,cz,cmag,dx,phi,si,sin2; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - c1mag*c1mag,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - c2mag*c2mag,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + cx = vb1y*vb2z - vb1z*vb2y; + cy = vb1z*vb2x - vb1x*vb2z; + cz = vb1x*vb2y - vb1y*vb2x; + cmag = sqrt(cx*cx + cy*cy + cz*cz); + dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + // p = k ( phi- phi0)^2 + // pd = dp/dc + + phi = acos(c); + if (dx < 0.0) phi *= -1.0; + si = sin(phi); + + double dphi = phi-phi0[type]; + p = k[type]*dphi; + if (fabs(si) < SMALLER) { + pd = - 2.0 * k[type]; + } else { + pd = - 2.0 * p / si; + } + p = p * dphi; + + if (EFLAG) edihedral = p; + + a = pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2 * (c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_quadratic_omp.h b/src/USER-OMP/dihedral_quadratic_omp.h new file mode 100644 index 0000000000..7be6cbd8cb --- /dev/null +++ b/src/USER-OMP/dihedral_quadratic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(quadratic/omp,DihedralQuadraticOMP) + +#else + +#ifndef LMP_DIHEDRAL_QUADRATIC_OMP_H +#define LMP_DIHEDRAL_QUADRATIC_OMP_H + +#include "dihedral_quadratic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralQuadraticOMP : public DihedralQuadratic, public ThrOMP { + + public: + DihedralQuadraticOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_table_omp.cpp b/src/USER-OMP/dihedral_table_omp.cpp index 55f5ffd97b..f368434144 100644 --- a/src/USER-OMP/dihedral_table_omp.cpp +++ b/src/USER-OMP/dihedral_table_omp.cpp @@ -15,7 +15,10 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ -#include "math.h" +#include <cmath> +#include <cstdlib> +#include <cstdio> + #include "dihedral_table_omp.h" #include "atom.h" #include "comm.h" @@ -25,12 +28,77 @@ #include "update.h" #include "error.h" +#include "math_const.h" +#include "math_extra.h" + #include "suffix.h" + using namespace LAMMPS_NS; -using namespace DIHEDRAL_TABLE_NS; +using namespace MathConst; +using namespace MathExtra; + #define TOLERANCE 0.05 #define SMALL 0.001 +// -------------------------------------------- +// ------- Calculate the dihedral angle ------- +// -------------------------------------------- +static const int g_dim=3; + +static double Phi(double const *x1, //array holding x,y,z coords atom 1 + double const *x2, // : : : : 2 + double const *x3, // : : : : 3 + double const *x4, // : : : : 4 + Domain *domain, //<-periodic boundary information + // The following arrays are of doubles with g_dim elements. + // (g_dim is a constant known at compile time, usually 3). + // Their contents is calculated by this function. + // Space for these vectors must be allocated in advance. + // (This is not hidden internally because these vectors + // may be needed outside the function, later on.) + double *vb12, // will store x2-x1 + double *vb23, // will store x3-x2 + double *vb34, // will store x4-x3 + double *n123, // will store normal to plane x1,x2,x3 + double *n234) // will store normal to plane x2,x3,x4 +{ + + for (int d=0; d < g_dim; ++d) { + vb12[d] = x2[d] - x1[d]; // 1st bond + vb23[d] = x3[d] - x2[d]; // 2nd bond + vb34[d] = x4[d] - x3[d]; // 3rd bond + } + + //Consider periodic boundary conditions: + domain->minimum_image(vb12[0],vb12[1],vb12[2]); + domain->minimum_image(vb23[0],vb23[1],vb23[2]); + domain->minimum_image(vb34[0],vb34[1],vb34[2]); + + //--- Compute the normal to the planes formed by atoms 1,2,3 and 2,3,4 --- + + cross3(vb23, vb12, n123); // <- n123=vb23 x vb12 + cross3(vb23, vb34, n234); // <- n234=vb23 x vb34 + + norm3(n123); + norm3(n234); + + double cos_phi = -dot3(n123, n234); + + if (cos_phi > 1.0) + cos_phi = 1.0; + else if (cos_phi < -1.0) + cos_phi = -1.0; + + double phi = acos(cos_phi); + + if (dot3(n123, vb34) > 0.0) { + phi = -phi; //(Note: Negative dihedral angles are possible only in 3-D.) + phi += MY_2PI; //<- This insure phi is always in the range 0 to 2*PI + } + return phi; +} // DihedralTable::Phi() + + /* ---------------------------------------------------------------------- */ DihedralTableOMP::DihedralTableOMP(class LAMMPS *lmp) @@ -178,9 +246,9 @@ void DihedralTableOMP::eval(int nfrom, int nto, ThrData * const thr) double dphi_dx3[g_dim]; // d x[i1][d] double dphi_dx4[g_dim]; //where d=0,1,2 corresponds to x,y,z (if g_dim==3) - double dot123 = DotProduct(vb12, vb23); - double dot234 = DotProduct(vb23, vb34); - double L23sqr = DotProduct(vb23, vb23); + double dot123 = dot3(vb12, vb23); + double dot234 = dot3(vb23, vb34); + double L23sqr = dot3(vb23, vb23); double L23 = sqrt(L23sqr); // (central bond length) double inv_L23sqr = 0.0; double inv_L23 = 0.0; @@ -200,15 +268,14 @@ void DihedralTableOMP::eval(int nfrom, int nto, ThrData * const thr) perp34on23[d] = vb34[d] - proj34on23[d]; } - // --- Compute the gradient vectors dphi/dx1 and dphi/dx4: --- // These two gradients point in the direction of n123 and n234, // and are scaled by the distances of atoms 1 and 4 from the central axis. // Distance of atom 1 to central axis: - double perp12on23_len = sqrt(DotProduct(perp12on23, perp12on23)); + double perp12on23_len = sqrt(dot3(perp12on23, perp12on23)); // Distance of atom 4 to central axis: - double perp34on23_len = sqrt(DotProduct(perp34on23, perp34on23)); + double perp34on23_len = sqrt(dot3(perp34on23, perp34on23)); double inv_perp12on23 = 0.0; if (perp12on23_len != 0.0) inv_perp12on23 = 1.0 / perp12on23_len; @@ -265,33 +332,10 @@ void DihedralTableOMP::eval(int nfrom, int nto, ThrData * const thr) } - - - #ifdef DIH_DEBUG_NUM - // ----- Numerical test? ----- - - cerr << " -- testing gradient for dihedral (n="<<n<<") for atoms (" - << i1 << "," << i2 << "," << i3 << "," << i4 << ") --" << endl; - - PrintGradientComparison(*this, dphi_dx1, dphi_dx2, dphi_dx3, dphi_dx4, - domain, x[i1], x[i2], x[i3], x[i4]); - - for (int d=0; d < g_dim; ++d) { - // The sum of all the gradients should be near 0. (translational symmetry) - cerr <<"sum_gradients["<<d<<"]="<<dphi_dx1[d]<<"+"<<dphi_dx2[d]<<"+"<<dphi_dx3[d]<<"+"<<dphi_dx4[d]<<"="<<dphi_dx1[d]+dphi_dx2[d]+dphi_dx3[d]+dphi_dx4[d]<<endl; - // These should sum to zero - assert(abs(dphi_dx1[d]+dphi_dx2[d]+dphi_dx3[d]+dphi_dx4[d]) < 0.0002/L23); - } - #endif // #ifdef DIH_DEBUG_NUM - - - - // ----- Step 3: Calculate the energy and force in the phi direction ----- // tabulated force & energy double u, m_du_dphi; //u = energy. m_du_dphi = "minus" du/dphi - assert((0.0 <= phi) && (phi <= TWOPI)); uf_lookup(type, phi, u, m_du_dphi); diff --git a/src/USER-OMP/improper_fourier_omp.cpp b/src/USER-OMP/improper_fourier_omp.cpp new file mode 100644 index 0000000000..10ba2aaf0e --- /dev/null +++ b/src/USER-OMP/improper_fourier_omp.cpp @@ -0,0 +1,282 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "improper_fourier_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +ImproperFourierOMP::ImproperFourierOMP(class LAMMPS *lmp) + : ImproperFourier(lmp), ThrOMP(lmp,THR_IMPROPER) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void ImproperFourierOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void ImproperFourierOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z; + + const double * const * const x = atom->x; + const int * const * const improperlist = neighbor->improperlist; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // 1st bond + + vb1x = x[i2][0] - x[i1][0]; + vb1y = x[i2][1] - x[i1][1]; + vb1z = x[i2][2] - x[i1][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i1][0]; + vb2y = x[i3][1] - x[i1][1]; + vb2z = x[i3][2] - x[i1][2]; + + // 3rd bond + + vb3x = x[i4][0] - x[i1][0]; + vb3y = x[i4][1] - x[i1][1]; + vb3z = x[i4][2] - x[i1][2]; + + add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i2,i3,i4,type, + vb1x,vb1y,vb1z, + vb2x,vb2y,vb2z, + vb3x,vb3y,vb3z,thr); + if ( all[type] ) { + add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i4,i2,i3,type, + vb3x,vb3y,vb3z, + vb1x,vb1y,vb1z, + vb2x,vb2y,vb2z,thr); + add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i3,i4,i2,type, + vb2x,vb2y,vb2z, + vb3x,vb3y,vb3z, + vb1x,vb1y,vb1z,thr); + } + } +} + +template <int EVFLAG, int EFLAG, int NEWTON_BOND> +void ImproperFourierOMP::add1_thr(const int i1,const int i2, + const int i3,const int i4, + const int type, + const double &vb1x, + const double &vb1y, + const double &vb1z, + const double &vb2x, + const double &vb2y, + const double &vb2z, + const double &vb3x, + const double &vb3y, + const double &vb3z, + ThrData * const thr) +{ + double eimproper,f1[3],f2[3],f3[3],f4[3]; + double c,c2,a,s,projhfg,dhax,dhay,dhaz,dahx,dahy,dahz,cotphi; + double ax,ay,az,ra2,rh2,ra,rh,rar,rhr,arx,ary,arz,hrx,hry,hrz; + + double * const * const f = thr->get_f(); + const int nlocal = atom->nlocal; + + eimproper = 0.0; + + // c0 calculation + // A = vb1 X vb2 is perpendicular to IJK plane + + ax = vb1y*vb2z-vb1z*vb2y; + ay = vb1z*vb2x-vb1x*vb2z; + az = vb1x*vb2y-vb1y*vb2x; + ra2 = ax*ax+ay*ay+az*az; + rh2 = vb3x*vb3x+vb3y*vb3y+vb3z*vb3z; + ra = sqrt(ra2); + rh = sqrt(rh2); + if (ra < SMALL) ra = SMALL; + if (rh < SMALL) rh = SMALL; + + rar = 1/ra; + rhr = 1/rh; + arx = ax*rar; + ary = ay*rar; + arz = az*rar; + hrx = vb3x*rhr; + hry = vb3y*rhr; + hrz = vb3z*rhr; + + c = arx*hrx+ary*hry+arz*hrz; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str, + "Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,atom->x[i1][0],atom->x[i1][1],atom->x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,atom->x[i2][0],atom->x[i2][1],atom->x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,atom->x[i3][0],atom->x[i3][1],atom->x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,atom->x[i4][0],atom->x[i4][1],atom->x[i4][2]); + } + } + + if (c > 1.0) s = 1.0; + if (c < -1.0) s = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + cotphi = c/s; + + projhfg = (vb3x*vb1x+vb3y*vb1y+vb3z*vb1z) / + sqrt(vb1x*vb1x+vb1y*vb1y+vb1z*vb1z); + projhfg += (vb3x*vb2x+vb3y*vb2y+vb3z*vb2z) / + sqrt(vb2x*vb2x+vb2y*vb2y+vb2z*vb2z); + if (projhfg > 0.0) { + s *= -1.0; + cotphi *= -1.0; + } + + // force and energy + // E = k ( C0 + C1 cos(w) + C2 cos(w) + + c2 = 2.0*s*s-1.0; + if (EFLAG) eimproper = k[type]*(C0[type]+C1[type]*s+C2[type]*c2); + + // dhax = diffrence between H and A in X direction, etc + + a = k[type]*(C1[type]+4.0*C2[type]*s)*cotphi; + dhax = hrx-c*arx; + dhay = hry-c*ary; + dhaz = hrz-c*arz; + + dahx = arx-c*hrx; + dahy = ary-c*hry; + dahz = arz-c*hrz; + + f2[0] = (dhay*vb1z - dhaz*vb1y)*rar; + f2[1] = (dhaz*vb1x - dhax*vb1z)*rar; + f2[2] = (dhax*vb1y - dhay*vb1x)*rar; + + f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar; + f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar; + f3[2] = (-dhax*vb2y + dhay*vb2x)*rar; + + f4[0] = dahx*rhr; + f4[1] = dahy*rhr; + f4[2] = dahz*rhr; + + f1[0] = -(f2[0] + f3[0] + f4[0]); + f1[1] = -(f2[1] + f3[1] + f4[1]); + f1[2] = -(f2[2] + f3[2] + f4[2]); + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]*a; + f[i1][1] += f1[1]*a; + f[i1][2] += f1[2]*a; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f3[0]*a; + f[i2][1] += f3[1]*a; + f[i2][2] += f3[2]*a; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f2[0]*a; + f[i3][1] += f2[1]*a; + f[i3][2] += f2[2]*a; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]*a; + f[i4][1] += f4[1]*a; + f[i4][2] += f4[2]*a; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); +} diff --git a/src/USER-OMP/improper_fourier_omp.h b/src/USER-OMP/improper_fourier_omp.h new file mode 100644 index 0000000000..765554bcaf --- /dev/null +++ b/src/USER-OMP/improper_fourier_omp.h @@ -0,0 +1,53 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(fourier/omp,ImproperFourierOMP) + +#else + +#ifndef LMP_IMPROPER_FOURIER_OMP_H +#define LMP_IMPROPER_FOURIER_OMP_H + +#include "improper_fourier.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperFourierOMP : public ImproperFourier, public ThrOMP { + + public: + ImproperFourierOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void eval(int ifrom, int ito, ThrData * const thr); + + template <int EVFLAG, int EFLAG, int NEWTON_BOND> + void add1_thr(const int,const int,const int,const int,const int, + const double &, const double &, const double &, + const double &, const double &, const double &, + const double &, const double &, const double &, + ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_ring_omp.cpp b/src/USER-OMP/improper_ring_omp.cpp index 78b15a0453..ec94573e2e 100644 --- a/src/USER-OMP/improper_ring_omp.cpp +++ b/src/USER-OMP/improper_ring_omp.cpp @@ -24,9 +24,11 @@ #include "force.h" #include "update.h" #include "error.h" +#include "math_special.h" #include "suffix.h" using namespace LAMMPS_NS; +using namespace MathSpecial; #define TOLERANCE 0.05 #define SMALL 0.001 @@ -167,7 +169,7 @@ void ImproperRingOMP::eval(int nfrom, int nto, ThrData * const thr) /* Append the current angle to the sum of angle differences. */ angle_summer += (bend_angle[icomb] - chi[type]); } - if (EFLAG) eimproper = (1.0/6.0) *k[type] * pow(angle_summer,6.0); + if (EFLAG) eimproper = (1.0/6.0) *k[type] * powint(angle_summer,6); /* printf("The tags: %d-%d-%d-%d, of type %d .\n",atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4],type); // printf("The coordinates of the first: %f, %f, %f.\n", x[i1][0], x[i1][1], x[i1][2]); @@ -181,7 +183,7 @@ void ImproperRingOMP::eval(int nfrom, int nto, ThrData * const thr) /* Force calculation acting on all atoms. Calculate the derivatives of the potential. */ - angfac = k[type] * pow(angle_summer,5.0); + angfac = k[type] * powint(angle_summer,5); f1[0] = 0.0; f1[1] = 0.0; f1[2] = 0.0; f3[0] = 0.0; f3[1] = 0.0; f3[2] = 0.0; diff --git a/src/USER-OMP/msm_cg_omp.cpp b/src/USER-OMP/msm_cg_omp.cpp new file mode 100644 index 0000000000..5d56a42182 --- /dev/null +++ b/src/USER-OMP/msm_cg_omp.cpp @@ -0,0 +1,522 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) + Original MSM class by: Paul Crozier, Stan Moore, Stephen Bond, (all SNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" + +#include "atom.h" +#include "commgrid.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "msm_cg_omp.h" + +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define OFFSET 16384 +#define SMALLQ 0.00001 + +enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM}; +enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM}; + +/* ---------------------------------------------------------------------- */ + +MSMCGOMP::MSMCGOMP(LAMMPS *lmp, int narg, char **arg) : MSMOMP(lmp, narg, arg) +{ + if ((narg < 1) || (narg > 2)) + error->all(FLERR,"Illegal kspace_style msm/cg/omp command"); + + if (narg == 2) + smallq = atof(arg[1]); + else + smallq = SMALLQ; + + num_charged = -1; + is_charged = NULL; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +MSMCGOMP::~MSMCGOMP() +{ + memory->destroy(is_charged); +} + + +/* ---------------------------------------------------------------------- + compute the MSM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void MSMCGOMP::compute(int eflag, int vflag) +{ + const double * const q = atom->q; + const int nlocal = atom->nlocal; + int i,j,n; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = eflag_either = vflag_either = 0; + + if (vflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + for (n=0; n<levels; n++) { + cg_peratom[n]->ghost_notify(); + cg_peratom[n]->setup(); + } + peratom_allocate_flag = 1; + } + + // extend size of per-atom arrays if necessary + + if (nlocal > nmax) { + memory->destroy(part2grid); + memory->destroy(is_charged); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"msm:part2grid"); + memory->create(is_charged,nmax,"msm/cg:is_charged"); + } + + // one time setup message + + if (num_charged < 0) { + bigint charged_all, charged_num; + double charged_frac, charged_fmax, charged_fmin; + + num_charged=0; + for (i=0; i < nlocal; ++i) + if (fabs(q[i]) > smallq) + ++num_charged; + + // get fraction of charged particles per domain + + if (nlocal > 0) + charged_frac = static_cast<double>(num_charged) * 100.0 + / static_cast<double>(nlocal); + else + charged_frac = 0.0; + + MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world); + MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world); + + // get fraction of charged particles overall + + charged_num = num_charged; + MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world); + charged_frac = static_cast<double>(charged_all) * 100.0 + / static_cast<double>(atom->natoms); + + if (me == 0) { + if (screen) + fprintf(screen, + " MSM/cg optimization cutoff: %g\n" + " Total charged atoms: %.1f%%\n" + " Min/max charged atoms/proc: %.1f%% %.1f%%\n", + smallq,charged_frac,charged_fmin,charged_fmax); + if (logfile) + fprintf(logfile, + " MSM/cg optimization cutoff: %g\n" + " Total charged atoms: %.1f%%\n" + " Min/max charged atoms/proc: %.1f%% %.1f%%\n", + smallq,charged_frac,charged_fmin,charged_fmax); + } + } + + num_charged = 0; + for (i = 0; i < nlocal; ++i) + if (fabs(q[i]) > smallq) { + is_charged[num_charged] = i; + ++num_charged; + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid (aninterpolation) + + particle_map(); + make_rho(); + + current_level = 0; + cg[0]->reverse_comm(this,REVERSE_RHO); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + + for (n=0; n<=levels-2; n++) { + current_level = n; + cg[n]->forward_comm(this,FORWARD_RHO); + + direct(n); + restriction(n); + } + + // top grid level + + current_level = levels-1; + cg[levels-1]->forward_comm(this,FORWARD_RHO); + direct_top(levels-1); + + for (n=levels-2; n>=0; n--) { + + prolongation(n); + + current_level = n; + cg[n]->reverse_comm(this,REVERSE_AD); + + // extra per-atom virial communication + + if (vflag_atom) + cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM); + } + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + current_level = 0; + + cg[0]->forward_comm(this,FORWARD_AD); + + // extra per-atom energy/virial communication + + if (vflag_atom) + cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM); + + // calculate the force on my particles (interpolation) + + fieldforce(); + + // calculate the per-atom energy for my particles + + if (evflag_atom) fieldforce_peratom(); + + const double qscale = force->qqrd2e * scale; + + // Total long-range energy + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + double e_self = qsqsum*gamma(0.0)/cutoff; // Self-energy term + energy -= e_self; + energy *= 0.5*qscale; + } + + // Total long-range virial + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + const double qs = 0.5*qscale; + + if (eflag_atom) { + const double sf = gamma(0.0)/cutoff; + for (j = 0; j < num_charged; j++) { + i = is_charged[j]; + eatom[i] -= q[i]*q[i]*sf; + eatom[i] *= qs; + } + } + + if (vflag_atom) { + for (n = 0; n < num_charged; n++) { + i = is_charged[n]; + for (j = 0; j < 6; j++) + vatom[i][j] *= qs; + } + } + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void MSMCGOMP::particle_map() +{ + const double * const * const x = atom->x; + + int flag = 0; + int i; + + // XXX: O(N). is it worth to add OpenMP here? + for (int j = 0; j < num_charged; j++) { + i = is_charged[j]; + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + const int nx=static_cast<int>((x[i][0]-boxlo[0])*delxinv[0]+OFFSET)-OFFSET; + const int ny=static_cast<int>((x[i][1]-boxlo[1])*delyinv[0]+OFFSET)-OFFSET; + const int nz=static_cast<int>((x[i][2]-boxlo[2])*delzinv[0]+OFFSET)-OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out[0] || nx+nupper > nxhi_out[0] || + ny+nlower < nylo_out[0] || ny+nupper > nyhi_out[0] || + nz+nlower < nzlo_out[0] || nz+nupper > nzhi_out[0]) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute MSM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void MSMCGOMP::make_rho() +{ + const double * const q = atom->q; + const double * const * const x = atom->x; + + // clear 3d density array + + double * const * const * const qgridn = qgrid[0]; + + memset(&(qgridn[nzlo_out[0]][nylo_out[0]][nxlo_out[0]]),0,ngrid[0]*sizeof(double)); + + double dx,dy,dz,x0,y0,z0; + int i,j,l,m,n,nx,ny,nz,mx,my,mz; + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + for (j = 0; j < num_charged; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx - (x[i][0]-boxlo[0])*delxinv[0]; + dy = ny - (x[i][1]-boxlo[1])*delyinv[0]; + dz = nz - (x[i][2]-boxlo[2])*delzinv[0]; + + compute_phis_and_dphis(dx,dy,dz); + + z0 = q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*phi1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*phi1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + qgridn[mz][my][mx] += x0*phi1d[0][l]; + } + } + } + } + +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get force on my particles +------------------------------------------------------------------------- */ + +void MSMCGOMP::fieldforce() +{ + + const double * const * const * const egridn = egrid[0]; + const double * const * const x = atom->x; + double * const * const f = atom->f; + const double * const q = atom->q; + + int i,j,l,m,n,nx,ny,nz,mx,my,mz; + double dx,dy,dz; + double phi_x,phi_y,phi_z; + double dphi_x,dphi_y,dphi_z; + double ekx,eky,ekz; + + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + for (j = 0; j < num_charged; j++) { + i = is_charged[j]; + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx - (x[i][0]-boxlo[0])*delxinv[0]; + dy = ny - (x[i][1]-boxlo[1])*delyinv[0]; + dz = nz - (x[i][2]-boxlo[2])*delzinv[0]; + + compute_phis_and_dphis(dx,dy,dz); + + ekx = eky = ekz = 0.0; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + phi_z = phi1d[2][n]; + dphi_z = dphi1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + phi_y = phi1d[1][m]; + dphi_y = dphi1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + phi_x = phi1d[0][l]; + dphi_x = dphi1d[0][l]; + ekx += dphi_x*phi_y*phi_z*egridn[mz][my][mx]; + eky += phi_x*dphi_y*phi_z*egridn[mz][my][mx]; + ekz += phi_x*phi_y*dphi_z*egridn[mz][my][mx]; + } + } + } + + ekx *= delxinv[0]; + eky *= delyinv[0]; + ekz *= delzinv[0]; + + // convert E-field to force + + const double qfactor = force->qqrd2e*scale*q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void MSMCGOMP::fieldforce_peratom() +{ + const double * const q = atom->q; + const double * const * const x = atom->x; + + double ***egridn = egrid[0]; + + double ***v0gridn = v0grid[0]; + double ***v1gridn = v1grid[0]; + double ***v2gridn = v2grid[0]; + double ***v3gridn = v3grid[0]; + double ***v4gridn = v4grid[0]; + double ***v5gridn = v5grid[0]; + + int i,j,l,m,n,nx,ny,nz,mx,my,mz; + double dx,dy,dz,x0,y0,z0; + double u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + for (j = 0; j < num_charged; j++) { + i = is_charged[j]; + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx - (x[i][0]-boxlo[0])*delxinv[0]; + dy = ny - (x[i][1]-boxlo[1])*delyinv[0]; + dz = nz - (x[i][2]-boxlo[2])*delzinv[0]; + + compute_phis_and_dphis(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = 0.0; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = phi1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*phi1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*phi1d[0][l]; + if (eflag_atom) u += x0*egridn[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0gridn[mz][my][mx]; + v1 += x0*v1gridn[mz][my][mx]; + v2 += x0*v2gridn[mz][my][mx]; + v3 += x0*v3gridn[mz][my][mx]; + v4 += x0*v4gridn[mz][my][mx]; + v5 += x0*v5gridn[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += q[i]*v0; + vatom[i][1] += q[i]*v1; + vatom[i][2] += q[i]*v2; + vatom[i][3] += q[i]*v3; + vatom[i][4] += q[i]*v4; + vatom[i][5] += q[i]*v5; + } + } +} + + +double MSMCGOMP::memory_usage() +{ + double bytes = MSM::memory_usage(); + bytes += nmax * sizeof(int); + return bytes; +} diff --git a/src/USER-OMP/msm_cg_omp.h b/src/USER-OMP/msm_cg_omp.h new file mode 100644 index 0000000000..1b6472afaa --- /dev/null +++ b/src/USER-OMP/msm_cg_omp.h @@ -0,0 +1,141 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(msm/cg/omp,MSMCGOMP) + +#else + +#ifndef LMP_MSM_CG_OMP_H +#define LMP_MSM_CG_OMP_H + +#include "msm_omp.h" + +namespace LAMMPS_NS { + +class MSMCGOMP : public MSMOMP { + public: + MSMCGOMP(class LAMMPS *, int, char **); + virtual ~MSMCGOMP(); + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + int num_charged; + int *is_charged; + double smallq; + + protected: + virtual void particle_map(); + virtual void make_rho(); + virtual void fieldforce(); + virtual void fieldforce_peratom(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Cannot (yet) use MSM with triclinic box + +This feature is not yet supported. + +E: Cannot (yet) use MSM with 2d simulation + +This feature is not yet supported. + +E: Kspace style requires atom attribute q + +The atom style defined does not have these attributes. + +E: Cannot use slab correction with MSM + +Slab correction can only be used with Ewald and PPPM, not MSM. + +E: MSM order must be 4, 6, 8, or 10 + +This is a limitation of the MSM implementation in LAMMPS: +the MSM order can only be 4, 6, 8, or 10. + +E: Cannot (yet) use single precision with MSM (remove -DFFT_SINGLE from Makefile and recompile) + +Single precision cannot be used with MSM. + +E: KSpace style is incompatible with Pair style + +Setting a kspace style requires that a pair style with a long-range +Coulombic component be selected that is compatible with MSM. Note +that TIP4P is not (yet) supported by MSM. + +E: Cannot use kspace solver on system with no charge + +No atoms in system have a non-zero charge. + +E: System is not charge neutral, net charge = %g + +The total charge on all atoms on the system is not 0.0, which +is not valid for MSM. + +E: MSM grid is too large + +The global MSM grid is larger than OFFSET in one or more dimensions. +OFFSET is currently set to 16384. You likely need to decrease the +requested accuracy. + +W: MSM mesh too small, increasing to 2 points in each direction + +The global MSM grid is too small, so the number of grid points has been +increased + +E: KSpace accuracy must be > 0 + +The kspace accuracy designated in the input must be greater than zero. + +W: Number of MSM mesh points increased to be a multiple of 2 + +MSM requires that the number of grid points in each direction be a multiple +of two and the number of grid points in one or more directions have been +adjusted to meet this requirement. + +W: Adjusting Coulombic cutoff for MSM, new cutoff = %g + +The adjust/cutoff command is turned on and the Coulombic cutoff has been +adjusted to match the user-specified accuracy. + +E: Out of range atoms - cannot compute MSM + +One or more atoms are attempting to map their charge to a MSM grid point +that is not owned by a processor. This is likely for one of two +reasons, both of them bad. First, it may mean that an atom near the +boundary of a processor's sub-domain has moved more than 1/2 the +"neighbor skin distance"_neighbor.html without neighbor lists being +rebuilt and atoms being migrated to new processors. This also means +you may be missing pairwise interactions that need to be computed. +The solution is to change the re-neighboring criteria via the +"neigh_modify"_neigh_modify command. The safest settings are "delay 0 +every 1 check yes". Second, it may mean that an atom has moved far +outside a processor's sub-domain or even the entire simulation box. +This indicates bad physics, e.g. due to highly overlapping atoms, too +large a timestep, etc. + +*/ diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp index 5000ea2a09..8ab26e9bf7 100644 --- a/src/USER-OMP/pair_airebo_omp.cpp +++ b/src/USER-OMP/pair_airebo_omp.cpp @@ -19,6 +19,7 @@ #include "error.h" #include "force.h" #include "memory.h" +#include "math_special.h" #include "neighbor.h" #include "neigh_list.h" @@ -28,6 +29,7 @@ #include "suffix.h" using namespace LAMMPS_NS; +using namespace MathSpecial; #define TOL 1.0e-9 @@ -2442,7 +2444,7 @@ void PairAIREBOOMP::TORSION_thr(int ifrom, int ito, cw2 = (.5*(1.0-cw)); ekijl = epsilonT[ktype][ltype]; Ec = 256.0*ekijl/405.0; - Vtors = (Ec*(pow(cw2,5.0)))-(ekijl/10.0); + Vtors = (Ec*(powint(cw2,5)))-(ekijl/10.0); if (eflag) evdwl = Vtors*w21*w23*w34*(1.0-tspjik)*(1.0-tspijl); @@ -2496,7 +2498,7 @@ void PairAIREBOOMP::TORSION_thr(int ifrom, int ito, ddndil = cross321mag*dxjdil; dcwddn = -cwnum/(cwnom*cwnom); dcwdn = 1.0/cwnom; - dvpdcw = (-1.0)*Ec*(-.5)*5.0*pow(cw2,4.0) * + dvpdcw = (-1.0)*Ec*(-.5)*5.0*powint(cw2,4) * w23*w21*w34*(1.0-tspjik)*(1.0-tspijl); Ftmp[0] = dvpdcw*((dcwdn*dndij[0])+(dcwddn*ddndij*del23[0]/r23)); diff --git a/src/USER-OMP/pair_beck_omp.cpp b/src/USER-OMP/pair_beck_omp.cpp index 9bec6c0db2..50c27862d1 100644 --- a/src/USER-OMP/pair_beck_omp.cpp +++ b/src/USER-OMP/pair_beck_omp.cpp @@ -19,9 +19,11 @@ #include "force.h" #include "neighbor.h" #include "neigh_list.h" +#include "math_special.h" #include "suffix.h" using namespace LAMMPS_NS; +using namespace MathSpecial; /* ---------------------------------------------------------------------- */ @@ -126,7 +128,7 @@ void PairBeckOMP::eval(int iifrom, int iito, ThrData * const thr) alphaij = alpha[itype][jtype]; betaij = beta[itype][jtype]; term1 = aaij*aaij + rsq; - term2 = 1.0/pow(term1,5.0); + term2 = powint(term1,-5); term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq; term4 = alphaij + r5*betaij; term5 = alphaij + 6.0*r5*betaij; @@ -146,7 +148,7 @@ void PairBeckOMP::eval(int iifrom, int iito, ThrData * const thr) } if (EFLAG) { - term6 = 1.0/pow(term1,3.0); + term6 = powint(term1,-3); term1inv = 1.0/term1; evdwl = AA[itype][jtype]*exp(-1.0*r*term4); evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv); diff --git a/src/USER-OMP/pair_brownian_omp.cpp b/src/USER-OMP/pair_brownian_omp.cpp index 20cc068809..15a2c25491 100644 --- a/src/USER-OMP/pair_brownian_omp.cpp +++ b/src/USER-OMP/pair_brownian_omp.cpp @@ -25,12 +25,14 @@ #include "variable.h" #include "random_mars.h" #include "math_const.h" +#include "math_special.h" #include "fix_wall.h" #include "suffix.h" using namespace LAMMPS_NS; using namespace MathConst; +using namespace MathSpecial; #define EPSILON 1.0e-10 @@ -105,11 +107,11 @@ void PairBrownianOMP::compute(int eflag, int vflag) double vol_f = vol_P/vol_T; if (flaglog == 0) { R0 = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f); - RT0 = 8*MY_PI*mu*pow(rad,3.0); + RT0 = 8*MY_PI*mu*cube(rad); //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f); } else { R0 = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f); - RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); + RT0 = 8*MY_PI*mu*cube(rad)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f); } } @@ -254,7 +256,7 @@ void PairBrownianOMP::eval(int iifrom, int iito, ThrData * const thr) if (FLAGLOG) { a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep)); a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep)); - a_pu = 8.0*MY_PI*mu*pow(radi,3.0)*(3.0/160.0*log(1.0/h_sep)); + a_pu = 8.0*MY_PI*mu*cube(radi)*(3.0/160.0*log(1.0/h_sep)); } else a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep); diff --git a/src/USER-OMP/pair_brownian_poly_omp.cpp b/src/USER-OMP/pair_brownian_poly_omp.cpp index 0378989fbc..09a7ef2e59 100644 --- a/src/USER-OMP/pair_brownian_poly_omp.cpp +++ b/src/USER-OMP/pair_brownian_poly_omp.cpp @@ -27,10 +27,12 @@ #include "fix_wall.h" #include "math_const.h" +#include "math_special.h" #include "suffix.h" using namespace LAMMPS_NS; using namespace MathConst; +using namespace MathSpecial; #define EPSILON 1.0e-10 @@ -105,11 +107,11 @@ void PairBrownianPolyOMP::compute(int eflag, int vflag) double vol_f = vol_P/vol_T; if (flaglog == 0) { R0 = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f); - RT0 = 8*MY_PI*mu*pow(rad,3.0); + RT0 = 8*MY_PI*mu*cube(rad); //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f); } else { R0 = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f); - RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); + RT0 = 8*MY_PI*mu*cube(rad)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f); } } @@ -250,20 +252,20 @@ void PairBrownianPolyOMP::eval(int iifrom, int iito, ThrData * const thr) if (FLAGLOG) { a_sq = beta0*beta0/beta1/beta1/h_sep + - (1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3.0)*log(1.0/h_sep); - a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0*pow(beta0,3.0) + - pow(beta0,4.0))/21.0/pow(beta1,4.0)*h_sep*log(1.0/h_sep); + (1.0+7.0*beta0+beta0*beta0)/5.0/cube(beta1)*log(1.0/h_sep); + a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0*cube(beta0) + + powint(beta0,4))/21.0/powint(beta1,4)*h_sep*log(1.0/h_sep); a_sq *= 6.0*MY_PI*mu*radi; - a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/pow(beta1,3.0) * + a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/cube(beta1) * log(1.0/h_sep); - a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3.0) + - 16.0*pow(beta0,4.0))/375.0/pow(beta1,4.0) * + a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*cube(beta0) + + 16.0*powint(beta0,4))/375.0/powint(beta1,4) * h_sep*log(1.0/h_sep); a_sh *= 6.0*MY_PI*mu*radi; a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep); a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 * - pow(beta0,3.0))/250.0/pow(beta1,3.0)*h_sep*log(1.0/h_sep); - a_pu *= 8.0*MY_PI*mu*pow(radi,3.0); + cube(beta0))/250.0/cube(beta1)*h_sep*log(1.0/h_sep); + a_pu *= 8.0*MY_PI*mu*cube(radi); } else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep); diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp index e2f5c56d99..e27ebb7b91 100644 --- a/src/USER-OMP/pair_colloid_omp.cpp +++ b/src/USER-OMP/pair_colloid_omp.cpp @@ -20,9 +20,11 @@ #include "force.h" #include "neighbor.h" #include "neigh_list.h" +#include "math_special.h" #include "suffix.h" using namespace LAMMPS_NS; +using namespace MathSpecial; /* ---------------------------------------------------------------------- */ @@ -169,10 +171,10 @@ void PairColloidOMP::eval(int iifrom, int iito, ThrData * const thr) K[6] = K[2]-r; K[7] = 1.0/(K[3]*K[4]); K[8] = 1.0/(K[5]*K[6]); - g[0] = pow(K[3],-7.0); - g[1] = pow(K[4],-7.0); - g[2] = pow(K[5],-7.0); - g[3] = pow(K[6],-7.0); + g[0] = powint(K[3],-7); + g[1] = powint(K[4],-7); + g[2] = powint(K[5],-7); + g[3] = powint(K[6],-7); h[0] = ((K[3]+5.0*K[1])*K[3]+30.0*K[0])*g[0]; h[1] = ((K[4]+5.0*K[1])*K[4]+30.0*K[0])*g[1]; h[2] = ((K[5]+5.0*K[2])*K[5]-30.0*K[0])*g[2]; diff --git a/src/USER-OMP/pair_dipole_sf_omp.cpp b/src/USER-OMP/pair_dipole_sf_omp.cpp index 4090bc3c02..f0157a4536 100644 --- a/src/USER-OMP/pair_dipole_sf_omp.cpp +++ b/src/USER-OMP/pair_dipole_sf_omp.cpp @@ -274,8 +274,9 @@ void PairDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr) if (EFLAG) { if (rsq < cut_coulsq[itype][jtype]) { - ecoul = qtmp * q[j] * rinv * - pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2.0); + ecoul = (1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])); + ecoul *= ecoul; + ecoul *= qtmp * q[j] * rinv; if (mu[i][3] > 0.0 && mu[j][3] > 0.0) ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr); if (mu[i][3] > 0.0 && q[j] != 0.0) diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp index 8e4b8a7a90..25e094bc7a 100644 --- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp @@ -22,10 +22,12 @@ #include "neigh_list.h" #include "math_const.h" +#include "math_special.h" #include "suffix.h" using namespace LAMMPS_NS; using namespace MathConst; +using namespace MathSpecial; #define SMALL 0.001 @@ -119,7 +121,6 @@ void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr) double r2inv,r10inv; double switch1,switch2; int *ilist,*jlist,*numneigh,**firstneigh; - Param *pm; evdwl = 0.0; @@ -178,9 +179,9 @@ void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr) ktype = type[k]; m = type2param[itype][jtype][ktype]; if (m < 0) continue; - pm = ¶ms[m]; + const Param &pm = params[m]; - if (rsq < pm->cut_outersq) { + if (rsq < pm.cut_outersq) { delr1[0] = xtmp - x[k][0]; delr1[1] = ytmp - x[k][1]; delr1[2] = ztmp - x[k][2]; @@ -203,7 +204,7 @@ void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr) if (c < -1.0) c = -1.0; ac = acos(c); - if (ac > pm->cut_angle && ac < (2.0*MY_PI - pm->cut_angle)) { + if (ac > pm.cut_angle && ac < (2.0*MY_PI - pm.cut_angle)) { s = sqrt(1.0 - c*c); if (s < SMALL) s = SMALL; @@ -211,24 +212,24 @@ void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr) r2inv = 1.0/rsq; r10inv = r2inv*r2inv*r2inv*r2inv*r2inv; - force_kernel = r10inv*(pm->lj1*r2inv - pm->lj2)*r2inv * - pow(c,(double)pm->ap); - force_angle = pm->ap * r10inv*(pm->lj3*r2inv - pm->lj4) * - pow(c,pm->ap-1.0)*s; - - eng_lj = r10inv*(pm->lj3*r2inv - pm->lj4); - if (rsq > pm->cut_innersq) { - switch1 = (pm->cut_outersq-rsq) * (pm->cut_outersq-rsq) * - (pm->cut_outersq + 2.0*rsq - 3.0*pm->cut_innersq) / - pm->denom_vdw; - switch2 = 12.0*rsq * (pm->cut_outersq-rsq) * - (rsq-pm->cut_innersq) / pm->denom_vdw; + force_kernel = r10inv*(pm.lj1*r2inv - pm.lj2)*r2inv * + powint(c,pm.ap); + force_angle = pm.ap * r10inv*(pm.lj3*r2inv - pm.lj4) * + powint(c,pm.ap-1)*s; + + eng_lj = r10inv*(pm.lj3*r2inv - pm.lj4); + if (rsq > pm.cut_innersq) { + switch1 = (pm.cut_outersq-rsq) * (pm.cut_outersq-rsq) * + (pm.cut_outersq + 2.0*rsq - 3.0*pm.cut_innersq) / + pm.denom_vdw; + switch2 = 12.0*rsq * (pm.cut_outersq-rsq) * + (rsq-pm.cut_innersq) / pm.denom_vdw; force_kernel = force_kernel*switch1 + eng_lj*switch2; eng_lj *= switch1; } if (EFLAG) { - evdwl = eng_lj * pow(c,(double)pm->ap); + evdwl = eng_lj * powint(c,pm.ap); evdwl *= factor_hb; } diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp index 546d73d2ad..d96d44b303 100644 --- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp @@ -22,10 +22,12 @@ #include "neigh_list.h" #include "math_const.h" +#include "math_special.h" #include "suffix.h" using namespace LAMMPS_NS; using namespace MathConst; +using namespace MathSpecial; #define SMALL 0.001 @@ -118,7 +120,6 @@ void PairHbondDreidingMorseOMP::eval(int iifrom, int iito, ThrData * const thr) double fi[3],fj[3],delr1[3],delr2[3]; double r,dr,dexp,eng_morse,switch1,switch2; int *ilist,*jlist,*numneigh,**firstneigh; - Param *pm; evdwl = 0.0; @@ -177,9 +178,9 @@ void PairHbondDreidingMorseOMP::eval(int iifrom, int iito, ThrData * const thr) ktype = type[k]; m = type2param[itype][jtype][ktype]; if (m < 0) continue; - pm = ¶ms[m]; + const Param &pm = params[m]; - if (rsq < pm->cut_outersq) { + if (rsq < pm.cut_outersq) { delr1[0] = xtmp - x[k][0]; delr1[1] = ytmp - x[k][1]; delr1[2] = ztmp - x[k][2]; @@ -202,31 +203,31 @@ void PairHbondDreidingMorseOMP::eval(int iifrom, int iito, ThrData * const thr) if (c < -1.0) c = -1.0; ac = acos(c); - if (ac > pm->cut_angle && ac < (2.0*MY_PI - pm->cut_angle)) { + if (ac > pm.cut_angle && ac < (2.0*MY_PI - pm.cut_angle)) { s = sqrt(1.0 - c*c); if (s < SMALL) s = SMALL; // Morse-specific kernel r = sqrt(rsq); - dr = r - pm->r0; - dexp = exp(-pm->alpha * dr); - eng_morse = pm->d0 * (dexp*dexp - 2.0*dexp); - force_kernel = pm->morse1*(dexp*dexp - dexp)/r * pow(c,(double)pm->ap); - force_angle = pm->ap * eng_morse * pow(c,(double)pm->ap-1.0)*s; - - if (rsq > pm->cut_innersq) { - switch1 = (pm->cut_outersq-rsq) * (pm->cut_outersq-rsq) * - (pm->cut_outersq + 2.0*rsq - 3.0*pm->cut_innersq) / - pm->denom_vdw; - switch2 = 12.0*rsq * (pm->cut_outersq-rsq) * - (rsq-pm->cut_innersq) / pm->denom_vdw; + dr = r - pm.r0; + dexp = exp(-pm.alpha * dr); + eng_morse = pm.d0 * (dexp*dexp - 2.0*dexp); + force_kernel = pm.morse1*(dexp*dexp - dexp)/r * powint(c,pm.ap); + force_angle = pm.ap * eng_morse * powint(c,pm.ap-1)*s; + + if (rsq > pm.cut_innersq) { + switch1 = (pm.cut_outersq-rsq) * (pm.cut_outersq-rsq) * + (pm.cut_outersq + 2.0*rsq - 3.0*pm.cut_innersq) / + pm.denom_vdw; + switch2 = 12.0*rsq * (pm.cut_outersq-rsq) * + (rsq-pm.cut_innersq) / pm.denom_vdw; force_kernel = force_kernel*switch1 + eng_morse*switch2; eng_morse *= switch1; } if (EFLAG) { - evdwl = eng_morse * pow(c,(double)params[m].ap); + evdwl = eng_morse * powint(c,pm.ap); evdwl *= factor_hb; } diff --git a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp new file mode 100644 index 0000000000..b46359e0d9 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp @@ -0,0 +1,229 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) + This style is a simplified re-implementation of the CG/CMM pair style +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_sdk_coul_msm_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "kspace.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "lj_sdk_common.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace LJSDKParms; +/* ---------------------------------------------------------------------- */ + +PairLJSDKCoulMSMOMP::PairLJSDKCoulMSMOMP(LAMMPS *lmp) : + PairLJSDKCoulMSM(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSDKCoulMSMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_msm_thr<1,1,1>(ifrom, ito, thr); + else eval_msm_thr<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_msm_thr<1,0,1>(ifrom, ito, thr); + else eval_msm_thr<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_msm_thr<0,0,1>(ifrom, ito, thr); + else eval_msm_thr<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template <int EVFLAG, int EFLAG, int NEWTON_PAIR> +void PairLJSDKCoulMSMOMP::eval_msm_thr(int iifrom, int iito, ThrData * const thr) +{ + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + const int nlocal = atom->nlocal; + + // loop over neighbors of my atoms + + for (int ii = iifrom; ii < iito; ++ii) { + + const int i = ilist[ii]; + const int itype = type[i]; + const double qtmp = q[i]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + double fxtmp,fytmp,fztmp; + fxtmp=fytmp=fztmp=0.0; + + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + double forcecoul, forcelj, evdwl, ecoul, fgamma, egamma; + forcecoul = forcelj = evdwl = ecoul = 0.0; + + const int sbindex = sbmask(jlist[jj]); + const int j = jlist[jj] & NEIGHMASK; + + const double delx = xtmp - x[j][0]; + const double dely = ytmp - x[j][1]; + const double delz = ztmp - x[j][2]; + const double rsq = delx*delx + dely*dely + delz*delz; + const int jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + const double r2inv = 1.0/rsq; + const int ljt = lj_type[itype][jtype]; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + const double r = sqrt(rsq); + const double prefactor = qqrd2e * qtmp*q[j]/r; + fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul); + forcecoul = prefactor * fgamma; + if (EFLAG) { + egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul); + ecoul = prefactor*egamma; + } + if (sbindex) { + const double adjust = (1.0-special_coul[sbindex])*prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const double fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + const double table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (EFLAG) ecoul = qtmp*q[j] * (etable[itable] + fraction*detable[itable]); + if (sbindex) { + const double table2 = ctable[itable] + fraction*dctable[itable]; + const double prefactor = qtmp*q[j] * table2; + const double adjust = (1.0-special_coul[sbindex])*prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + } + } + + if (rsq < cut_ljsq[itype][jtype]) { + + if (ljt == LJ12_4) { + const double r4inv=r2inv*r2inv; + forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv + - lj2[itype][jtype]); + + if (EFLAG) + evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ9_6) { + const double r3inv = r2inv*sqrt(r2inv); + const double r6inv = r3inv*r3inv; + forcelj = r6inv*(lj1[itype][jtype]*r3inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r3inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ12_6) { + const double r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv*(lj1[itype][jtype]*r6inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r6inv + - lj4[itype][jtype]) - offset[itype][jtype]; + } + + if (sbindex) { + const double factor_lj = special_lj[sbindex]; + forcelj *= factor_lj; + if (EFLAG) evdwl *= factor_lj; + } + + } + + const double fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSDKCoulMSMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJSDKCoulMSM::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.h b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.h new file mode 100644 index 0000000000..c85f7fb438 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sdk/coul/msm/omp,PairLJSDKCoulMSMOMP) +PairStyle(cg/cmm/coul/msm/omp,PairLJSDKCoulMSMOMP) + +#else + +#ifndef LMP_PAIR_LJ_SDK_COUL_MSM_OMP_H +#define LMP_PAIR_LJ_SDK_COUL_MSM_OMP_H + +#include "pair_lj_sdk_coul_msm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSDKCoulMSMOMP : public PairLJSDKCoulMSM, public ThrOMP { + + public: + PairLJSDKCoulMSMOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template <int EVFLAG, int EFLAG, int NEWTON_PAIR> + void eval_msm_thr(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp index fcac7367cf..9deb562a86 100644 --- a/src/USER-OMP/thr_omp.cpp +++ b/src/USER-OMP/thr_omp.cpp @@ -178,6 +178,13 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, thr->virial_fdotr_compute(x, nlocal, nghost, -1); else thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); + } else { + if (style == fix->last_pair_hybrid) { + // pair_style hybrid will compute fdotr for us + // but we first need to reduce the forces + data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + need_force_reduce = 0; + } } } -- GitLab