diff --git a/src/USER-OMP/Install.sh b/src/USER-OMP/Install.sh index db0beb52181f60d1a5180aca29a97ea927ef437b..ec6fac4b92e2ed87c42320b8a63d102e19fe7fb5 100644 --- a/src/USER-OMP/Install.sh +++ b/src/USER-OMP/Install.sh @@ -1,10 +1,11 @@ # Install/unInstall package files in LAMMPS # do not install child files if parent does not exist -for file in *_omp.cpp *_omp.h; do +for file in *_omp.cpp *_omp.h pppm*proxy.h pppm*proxy.cpp; do # let us see if the "rain man" can count the toothpicks... - ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` - + ofile=`echo $file | sed -e s,_pppm_tip4p_omp,_long_tip4p_omp, \ + -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \ + -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` if (test $1 = 1) then if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then : # always install those files. @@ -18,3 +19,15 @@ for file in *_omp.cpp *_omp.h; do rm -f ../$file fi done + +if (test $1 = 1) then + + cp thr_data.h .. + cp thr_data.cpp .. + +elif (test $1 = 0) then + + rm -f ../thr_data.h + rm -f ../thr_data.cpp + +fi diff --git a/src/USER-OMP/Package.sh b/src/USER-OMP/Package.sh index 5a004c918709ca6dfc2bf7d98dc5e5699fd0a26e..6f577b2791653f29b646e3881821eccf89352954 100644 --- a/src/USER-OMP/Package.sh +++ b/src/USER-OMP/Package.sh @@ -1,22 +1,40 @@ # Update package files in LAMMPS -# cp package file to src if doesn't exist or is different -# do not copy certain files if non-OMP versions do not exist -# do remove OpenMP style files that have no matching -# non-OpenMP version installed, e.g. after a package has been removed - -for file in *_omp.cpp *_omp.h; do +# copy package file to src if it doesn't exists or is different +# do not copy OpenMP style files, if a non-OpenMP version does +# not exist. Do remove OpenMP style files that have no matching +# non-OpenMP version installed, e.g. after a package has been +# removed +for file in *_omp.cpp *_omp.h pppm*proxy.h pppm*proxy.cpp thr_data.h thr_data.cpp; do # let us see if the "rain man" can count the toothpicks... - ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` - if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then - : # always check for those files. + ofile=`echo $file | sed -e s,_pppm_tip4p_omp,_long_tip4p_omp, \ + -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \ + -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` + if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") \ + || (test $file = "thr_data.h") || (test $file = "thr_data.cpp") then + if (test ! -e ../$file) then + echo " creating src/$file" + cp $file .. + elif ! cmp -s $file ../$file ; then + echo " updating src/$file" + cp $file .. + fi elif (test ! -e ../$ofile) then if (test -e ../$file) then echo " removing src/$file" rm -f ../$file fi - continue + else + if (test ! -e ../$file) then + echo " creating src/$file" + cp $file .. + elif ! cmp -s $file ../$file ; then + echo " updating src/$file" + cp $file .. + fi fi +done +for file in thr_data.h thr_data.cpp; do if (test ! -e ../$file) then echo " creating src/$file" cp $file .. diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp index 63bfc432700b40badc2fee8bb127c8be81415f95..b4d7e2e4adcb854f9f048455a49922eb0b75b5f6 100644 --- a/src/USER-OMP/dihedral_charmm_omp.cpp +++ b/src/USER-OMP/dihedral_charmm_omp.cpp @@ -40,7 +40,6 @@ void DihedralCharmmOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; // insure pair->ev_tally() will use 1-4 virial contribution @@ -53,43 +52,34 @@ void DihedralCharmmOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - - // reduce contributions to non-bonded energy terms - for (int n = 0; n < nthreads; ++n) { - force->pair->eng_vdwl += eng_vdwl_thr[n]; - force->pair->eng_coul += eng_coul_thr[n]; - } } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralCharmmOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,i,m,n,type; @@ -105,12 +95,13 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - double *q = atom->q; - int *atomtype = atom->type; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const atomtype = atom->type; + const int * const * const dihedrallist = neighbor->dihedrallist; + const double qqrd2e = force->qqrd2e; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -176,7 +167,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -282,7 +273,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); // 1-4 LJ and Coulomb interactions // tally energy/virial in pair, using newton_bond as newton flag @@ -321,7 +312,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) } if (EVFLAG) ev_tally_thr(force->pair,i1,i4,nlocal,NEWTON_BOND, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } } diff --git a/src/USER-OMP/dihedral_charmm_omp.h b/src/USER-OMP/dihedral_charmm_omp.h index a39ad83f7e7e923e186890d42fbb0b556fb845bf..75ba6410d55d942af6a8435d9b4411699c1cb2a7 100644 --- a/src/USER-OMP/dihedral_charmm_omp.h +++ b/src/USER-OMP/dihedral_charmm_omp.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -33,13 +33,13 @@ class DihedralCharmmOMP : public DihedralCharmm, public ThrOMP { public: DihedralCharmmOMP(class LAMMPS *lmp) : - DihedralCharmm(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralCharmm(lmp), ThrOMP(lmp,THR_DIHEDRAL|THR_CHARMM) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp index 7348296644cf0310d855de46bc50bb5dae8f1769..07e0fba6e117fab9646f4be98f4b64d30461f502 100644 --- a/src/USER-OMP/dihedral_class2_omp.cpp +++ b/src/USER-OMP/dihedral_class2_omp.cpp @@ -39,7 +39,6 @@ void DihedralClass2OMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -47,37 +46,34 @@ void DihedralClass2OMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralClass2OMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,i,j,k,n,type; @@ -96,9 +92,10 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -170,7 +167,7 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) sc2 = sqrt(sin2); if (sc2 < SMALL) sc2 = SMALL; sc2 = 1.0/sc2; - + s1 = sc1 * sc1; s2 = sc2 * sc2; s12 = sc1 * sc2; @@ -179,12 +176,12 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) // error check if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { - int me; - MPI_Comm_rank(world,&me); + int me = comm->me; + if (screen) { char str[128]; - sprintf(str,"Dihedral problem: %d " BIGINT_FORMAT " %d %d %d %d", - me,update->ntimestep, + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -526,7 +523,7 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral, fabcd[0],fabcd[2],fabcd[3], - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_class2_omp.h b/src/USER-OMP/dihedral_class2_omp.h index d26f2f87138140f64a70cc61408e4b10fa40605f..14a6c40edd39a022ff08c1bec26e9a8e1df65b48 100644 --- a/src/USER-OMP/dihedral_class2_omp.h +++ b/src/USER-OMP/dihedral_class2_omp.h @@ -33,13 +33,13 @@ class DihedralClass2OMP : public DihedralClass2, public ThrOMP { public: DihedralClass2OMP(class LAMMPS *lmp) : - DihedralClass2(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralClass2(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp index a6c027e92d7f107317388453e86cb96ffc555087..1a80e8a7cd746b5055237c05ca8e85d2e16919e4 100644 --- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp @@ -39,7 +39,6 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -47,37 +46,34 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,n,type; @@ -91,9 +87,10 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -159,7 +156,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -172,7 +169,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) me,x[i4][0],x[i4][1],x[i4][2]); } } - + if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; @@ -257,7 +254,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h index eb906ab9534b29051796f9a4017633a4a22900d7..54627c169b0a4016ec446bde9e1704102c8ad318 100644 --- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h @@ -33,13 +33,13 @@ class DihedralCosineShiftExpOMP : public DihedralCosineShiftExp, public ThrOMP { public: DihedralCosineShiftExpOMP(class LAMMPS *lmp) : - DihedralCosineShiftExp(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralCosineShiftExp(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp index 0fa24090a74a4ce11cf9f9ba2b29c25cafe84958..cdad9b6ab89fa1a41cad51388c39e6f55f535d6d 100644 --- a/src/USER-OMP/dihedral_harmonic_omp.cpp +++ b/src/USER-OMP/dihedral_harmonic_omp.cpp @@ -39,7 +39,6 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -47,37 +46,34 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,i,m,n,type; @@ -90,9 +86,10 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -158,7 +155,7 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -264,7 +261,6 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } - diff --git a/src/USER-OMP/dihedral_harmonic_omp.h b/src/USER-OMP/dihedral_harmonic_omp.h index 2d7bae64ee6e768423f6d67823be0c329a1b6ee7..8b8562ad90fad8592fa83a7e380b6dc0e5c5c3e3 100644 --- a/src/USER-OMP/dihedral_harmonic_omp.h +++ b/src/USER-OMP/dihedral_harmonic_omp.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -33,13 +33,13 @@ class DihedralHarmonicOMP : public DihedralHarmonic, public ThrOMP { public: DihedralHarmonicOMP(class LAMMPS *lmp) : - DihedralHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp index 4ec701a0cbe3e429a428aef6e96f2bfdb2888305..b9b61982f9f92aa0cec0d543e36e6fb4cbd268b5 100644 --- a/src/USER-OMP/dihedral_helix_omp.cpp +++ b/src/USER-OMP/dihedral_helix_omp.cpp @@ -42,7 +42,6 @@ void DihedralHelixOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -50,37 +49,34 @@ void DihedralHelixOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralHelixOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,n,type; @@ -94,9 +90,10 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -132,18 +129,18 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) domain->minimum_image(vb3x,vb3y,vb3z); // c0 calculation - + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); - + rb1 = sqrt(sb1); rb3 = sqrt(sb3); - + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; // 1st and 2nd angle - + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; b1mag = sqrt(b1mag2); b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; @@ -181,15 +178,16 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) cz = vb1x*vb2y - vb1y*vb2x; cmag = sqrt(cx*cx + cy*cy + cz*cz); dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag; - + // error check if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { int me = comm->me; + if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -202,7 +200,7 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) me,x[i4][0],x[i4][1],x[i4][2]); } } - + if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; @@ -217,7 +215,6 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) if (EFLAG) edihedral = aphi[type]*(1.0 - c) + bphi[type]*(1.0 + cos(3.0*phi)) + cphi[type]*(1.0 + cos(phi + MY_PI4)); -; a = pd; c = c * a; @@ -277,6 +274,6 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_helix_omp.h b/src/USER-OMP/dihedral_helix_omp.h index 792319741345b5ce9a2e30fa38e12519b69c7963..e932045cfff06717d8a0298206b2349ca880f5e0 100644 --- a/src/USER-OMP/dihedral_helix_omp.h +++ b/src/USER-OMP/dihedral_helix_omp.h @@ -33,13 +33,13 @@ class DihedralHelixOMP : public DihedralHelix, public ThrOMP { public: DihedralHelixOMP(class LAMMPS *lmp) : - DihedralHelix(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralHelix(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp index bde958984e5e148635b31c78a0b23c5abf295a15..822ddb79650ab6c0f82f7c966417698bec10fadf 100644 --- a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp @@ -39,7 +39,6 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -47,37 +46,34 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralMultiHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,n,type; @@ -91,9 +87,10 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -180,7 +177,7 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -264,6 +261,6 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.h b/src/USER-OMP/dihedral_multi_harmonic_omp.h index da2322f03834eb79c85eb93bda5b7eb6ff0a5c0e..628ad2a6a09dc2fc9df8e8e2274420ee060f16e1 100644 --- a/src/USER-OMP/dihedral_multi_harmonic_omp.h +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.h @@ -33,13 +33,13 @@ class DihedralMultiHarmonicOMP : public DihedralMultiHarmonic, public ThrOMP { public: DihedralMultiHarmonicOMP(class LAMMPS *lmp) : - DihedralMultiHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralMultiHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp index 9f59e26d26dee23d96c3c781b35661eecb0085eb..6e46575f3b2d4f0ae00a552c8b7d1b51f9ba30c8 100644 --- a/src/USER-OMP/dihedral_opls_omp.cpp +++ b/src/USER-OMP/dihedral_opls_omp.cpp @@ -40,7 +40,6 @@ void DihedralOPLSOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -48,37 +47,34 @@ void DihedralOPLSOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralOPLSOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,n,type; @@ -92,9 +88,10 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -188,7 +185,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -201,7 +198,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) me,x[i4][0],x[i4][1],x[i4][2]); } } - + if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; @@ -280,7 +277,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_opls_omp.h b/src/USER-OMP/dihedral_opls_omp.h index 58b9920538b6919ff6d93d9d0ec50e624f0661ab..44c76bb2adc2caf476bb03042ce56a49433fa879 100644 --- a/src/USER-OMP/dihedral_opls_omp.h +++ b/src/USER-OMP/dihedral_opls_omp.h @@ -33,13 +33,13 @@ class DihedralOPLSOMP : public DihedralOPLS, public ThrOMP { public: DihedralOPLSOMP(class LAMMPS *lmp) : - DihedralOPLS(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralOPLS(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/fix_nve_sphere_omp.cpp b/src/USER-OMP/fix_nve_sphere_omp.cpp index a642b21f222c409a6e4c04f23f70d3e438d9f39d..93af055f8111d9ae5505bb0e085e9b5dcaa5299c 100644 --- a/src/USER-OMP/fix_nve_sphere_omp.cpp +++ b/src/USER-OMP/fix_nve_sphere_omp.cpp @@ -34,26 +34,24 @@ enum{NONE,DIPOLE}; void FixNVESphereOMP::initial_integrate(int vflag) { - double **x = atom->x; - double **v = atom->v; - double **f = atom->f; - double **omega = atom->omega; - double **torque = atom->torque; - double *radius = atom->radius; - double *rmass = atom->rmass; - int *mask = atom->mask; - int nlocal = atom->nlocal; + double * const * const x = atom->x; + double * const * const v = atom->v; + const double * const * const f = atom->f; + double * const * const omega = atom->omega; + const double * const * const torque = atom->torque; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const int * const mask = atom->mask; + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; int i; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - // set timestep here since dt may have changed or come via rRESPA const double dtfrotate = dtf / INERTIA; // update v,x,omega for all particles // d_omega/dt = torque / inertia #if defined(_OPENMP) -#pragma omp parallel for private(i) default(shared) +#pragma omp parallel for private(i) default(none) #endif for (i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -77,9 +75,9 @@ void FixNVESphereOMP::initial_integrate(int vflag) // renormalize mu to dipole length if (extra == DIPOLE) { - double **mu = atom->mu; + double * const * const mu = atom->mu; #if defined(_OPENMP) -#pragma omp parallel for private(i) default(shared) +#pragma omp parallel for private(i) default(none) #endif for (i = 0; i < nlocal; i++) { double g0,g1,g2,msq,scale; @@ -103,18 +101,16 @@ void FixNVESphereOMP::initial_integrate(int vflag) void FixNVESphereOMP::final_integrate() { - double **v = atom->v; - double **f = atom->f; - double **omega = atom->omega; - double **torque = atom->torque; - double *rmass = atom->rmass; - double *radius = atom->radius; - int *mask = atom->mask; - int nlocal = atom->nlocal; + double * const * const v = atom->v; + const double * const * const f = atom->f; + double * const * const omega = atom->omega; + const double * const * const torque = atom->torque; + const double * const rmass = atom->rmass; + const double * const radius = atom->radius; + const int * const mask = atom->mask; + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; int i; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - // set timestep here since dt may have changed or come via rRESPA const double dtfrotate = dtf / INERTIA; @@ -123,7 +119,7 @@ void FixNVESphereOMP::final_integrate() // d_omega/dt = torque / inertia #if defined(_OPENMP) -#pragma omp parallel for private(i) default(shared) +#pragma omp parallel for private(i) default(none) #endif for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) { diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp index 40781cb4078812f36b28a64037596e187170b3fe..4655dd1af77a8f09ba188b8d26d992b96a475085 100644 --- a/src/USER-OMP/fix_shear_history_omp.cpp +++ b/src/USER-OMP/fix_shear_history_omp.cpp @@ -47,7 +47,7 @@ void FixShearHistoryOMP::pre_exchange() int flag = 0; #if defined(_OPENMP) -#pragma omp parallel shared(flag) +#pragma omp parallel default(none) shared(flag) #endif { diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp index e91642e6ba61e2f9bb24a7ac8d9da7d86d3019a5..3af4a2f7cd5e0bf6dc0542afe53ab24148396d8e 100644 --- a/src/USER-OMP/pair_adp_omp.cpp +++ b/src/USER-OMP/pair_adp_omp.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairADPOMP::PairADPOMP(LAMMPS *lmp) : - PairADP(lmp), ThrOMP(lmp, PAIR) + PairADP(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -39,10 +39,10 @@ void PairADPOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; - const int nall = atom->nlocal + atom->nghost; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; @@ -62,48 +62,39 @@ void PairADPOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, *rho_t, **mu_t, **lambda_t; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - if (force->newton_pair) { - rho_t = rho + tid*nall; - mu_t = mu + tid*nall; - lambda_t = lambda + tid*nall; - } else { - rho_t = rho + tid*atom->nlocal; - mu_t = mu + tid*atom->nlocal; - lambda_t = lambda + tid*atom->nlocal; - } + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_adp(nall, rho, mu, lambda); + else + thr->init_adp(nlocal, rho, mu, lambda); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); - else eval<1,1,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); - else eval<1,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); - else eval<0,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, - double **lambda_t, int iifrom, int iito, int tid) +void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,m,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -117,7 +108,13 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, evdwl = 0.0; - double **x = atom->x; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const * const mu_t = thr->get_mu(); + double * const * const lambda_t = thr->get_lambda(); + const int tid = thr->get_tid(); + int *type = atom->type; int nlocal = atom->nlocal; int nall = nlocal + atom->nghost; @@ -128,18 +125,6 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, numneigh = list->numneigh; firstneigh = list->firstneigh; - // zero out density - - if (NEWTON_PAIR) { - memset(rho_t, 0, nall*sizeof(double)); - memset(&(mu_t[0][0]), 0, 3*nall*sizeof(double)); - memset(&(lambda_t[0][0]), 0, 6*nall*sizeof(double)); - } else { - memset(rho_t, 0, nlocal*sizeof(double)); - memset(&(mu_t[0][0]), 0, 3*nlocal*sizeof(double)); - memset(&(lambda_t[0][0]), 0, 6*nlocal*sizeof(double)); - } - // rho = density at each atom // loop over neighbors of my atoms @@ -259,8 +244,7 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, lambda[i][4]+lambda[i][5]*lambda[i][5]); phi -= 1.0/6.0*(lambda[i][0]+lambda[i][1]+lambda[i][2])* (lambda[i][0]+lambda[i][1]+lambda[i][2]); - if (eflag_global) eng_vdwl_thr[tid] += phi; - if (eflag_atom) eatom_thr[tid][i] += phi; + e_tally_thr(this,i,i,nlocal,/* newton_pair */ 1, phi, 0.0, thr); } } @@ -384,7 +368,7 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, if (EFLAG) evdwl = phi; if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, - fx,fy,fz,delx,dely,delz,tid); + fx,fy,fz,delx,dely,delz,thr); } } f[i][0] += fxtmp; @@ -399,6 +383,6 @@ double PairADPOMP::memory_usage() { double bytes = memory_usage_thr(); bytes += PairADP::memory_usage(); - + bytes += (comm->nthreads-1) * nmax * (10*sizeof(double) + 3*sizeof(double *)); return bytes; } diff --git a/src/USER-OMP/pair_adp_omp.h b/src/USER-OMP/pair_adp_omp.h index f7d2509cd3c01b7a735c6675460f330aeb502671..9a7f4023fb4ce674ab222609ef78ff67f3870e64 100644 --- a/src/USER-OMP/pair_adp_omp.h +++ b/src/USER-OMP/pair_adp_omp.h @@ -39,8 +39,7 @@ class PairADPOMP : public PairADP, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double *rho_t, double **mu_t, double **lambda_t, - int iifrom, int iito, int tid); + void eval(int iifrom, int iito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_born_coul_long_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp index c277a080c0974d764aee599a923e073e71d7bbfe..cf409f3cfc74d31d1b978d5215c158819b111382 100644 --- a/src/USER-OMP/pair_born_coul_long_omp.cpp +++ b/src/USER-OMP/pair_born_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBornCoulLongOMP::PairBornCoulLongOMP(LAMMPS *lmp) : - PairBornCoulLong(lmp), ThrOMP(lmp, PAIR) + PairBornCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBornCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -95,9 +90,10 @@ void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; double *special_lj = force->special_lj; @@ -179,7 +175,7 @@ void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_born_coul_long_omp.h b/src/USER-OMP/pair_born_coul_long_omp.h index d6ccbfc680b648d5989cfdc217c779b46722612e..3271c566a48361e5692aa17f6df5dab67b42f24b 100644 --- a/src/USER-OMP/pair_born_coul_long_omp.h +++ b/src/USER-OMP/pair_born_coul_long_omp.h @@ -39,7 +39,7 @@ class PairBornCoulLongOMP : public PairBornCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_born_omp.cpp b/src/USER-OMP/pair_born_omp.cpp index c39d205c972fa834760ebdff2358d0ba963d611c..d9dbf0d29e34d8a2c5cf5904165e8a85d06bc6b6 100644 --- a/src/USER-OMP/pair_born_omp.cpp +++ b/src/USER-OMP/pair_born_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBornOMP::PairBornOMP(LAMMPS *lmp) : - PairBorn(lmp), ThrOMP(lmp, PAIR) + PairBorn(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairBornOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairBornOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBornOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBornOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,7 +79,8 @@ void PairBornOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; @@ -143,7 +139,7 @@ void PairBornOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_born_omp.h b/src/USER-OMP/pair_born_omp.h index b24de4a57778f1ddaa3bb030acb12df6947372cb..726064472844e577465d5c35cff4994e3579f476 100644 --- a/src/USER-OMP/pair_born_omp.h +++ b/src/USER-OMP/pair_born_omp.h @@ -39,7 +39,7 @@ class PairBornOMP : public PairBorn, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp index ac47d478a03f692cba6fd61da3e702b44677105f..235f1c4f2cb104f19ad68d8b046cb970be63c136 100644 --- a/src/USER-OMP/pair_buck_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBuckCoulCutOMP::PairBuckCoulCutOMP(LAMMPS *lmp) : - PairBuckCoulCut(lmp), ThrOMP(lmp, PAIR) + PairBuckCoulCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,35 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -86,8 +80,9 @@ void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; @@ -162,7 +157,7 @@ void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.h b/src/USER-OMP/pair_buck_coul_cut_omp.h index a77f3bad24e13fdaef7564b0c2961d8db0cc083e..8fee0808c0007d2b10c91633d5a521f503f1cf9f 100644 --- a/src/USER-OMP/pair_buck_coul_cut_omp.h +++ b/src/USER-OMP/pair_buck_coul_cut_omp.h @@ -39,7 +39,7 @@ class PairBuckCoulCutOMP : public PairBuckCoulCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_buck_coul_long_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp index 6e7398ca449b3d8ab666313c30e221a34f5ecb88..083b9acc6ed3fc91f0265028179ea683accd65eb 100644 --- a/src/USER-OMP/pair_buck_coul_long_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBuckCoulLongOMP::PairBuckCoulLongOMP(LAMMPS *lmp) : - PairBuckCoulLong(lmp), ThrOMP(lmp, PAIR) + PairBuckCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,37 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -95,8 +91,9 @@ void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; @@ -178,7 +175,7 @@ void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_buck_coul_long_omp.h b/src/USER-OMP/pair_buck_coul_long_omp.h index 2c87904de8eaee46f2689eb63b54f437630f4f98..a47e809eec074c709f5a4c28527f30a4aaf6ca9b 100644 --- a/src/USER-OMP/pair_buck_coul_long_omp.h +++ b/src/USER-OMP/pair_buck_coul_long_omp.h @@ -39,7 +39,7 @@ class PairBuckCoulLongOMP : public PairBuckCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_buck_coul_omp.cpp b/src/USER-OMP/pair_buck_coul_omp.cpp index bd171f628a48f0ab8d9bb212a31d8081fd2dbef2..97299feeeb958eb6f35649a14fbf6b6a58b0cda1 100644 --- a/src/USER-OMP/pair_buck_coul_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_omp.cpp @@ -34,7 +34,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBuckCoulOMP::PairBuckCoulOMP(LAMMPS *lmp) : - PairBuckCoul(lmp), ThrOMP(lmp, PAIR) + PairBuckCoul(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -45,7 +45,6 @@ void PairBuckCoulOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -53,53 +52,50 @@ void PairBuckCoulOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckCoulOMP::eval(int iifrom, int iito, ThrData * const thr) { double evdwl,ecoul,fpair; evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; double *special_lj = force->special_lj; double qqrd2e = force->qqrd2e; - double *x0 = x[0]; + const double *x0 = x[0]; double *f0 = f[0], *fi = f0; int *ilist = list->ilist; @@ -129,7 +125,7 @@ void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid) ni = sbmask(j); j &= NEIGHMASK; - { register double *xj = x0+(j+(j<<1)); + { const register double *xj = x0+(j+(j<<1)); d[0] = xi[0] - xj[0]; // pair vector d[1] = xi[1] - xj[1]; d[2] = xi[2] - xj[2]; } @@ -214,7 +210,7 @@ void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,d[0],d[1],d[2],tid); + evdwl,ecoul,fpair,d[0],d[1],d[2],thr); } } } diff --git a/src/USER-OMP/pair_buck_coul_omp.h b/src/USER-OMP/pair_buck_coul_omp.h index dbff9b419a7ca0ca5bb8bf3cc340d2d921c7648c..823f64a4aba1a79425d3173b71989264d65f6c05 100644 --- a/src/USER-OMP/pair_buck_coul_omp.h +++ b/src/USER-OMP/pair_buck_coul_omp.h @@ -39,7 +39,7 @@ class PairBuckCoulOMP : public PairBuckCoul, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_buck_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp index 66d8730abd72bef72da6e3092400382bb9432a55..5806a3e796884ffcf0fc92112be8eae7fd92d09f 100644 --- a/src/USER-OMP/pair_buck_omp.cpp +++ b/src/USER-OMP/pair_buck_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBuckOMP::PairBuckOMP(LAMMPS *lmp) : - PairBuck(lmp), ThrOMP(lmp, PAIR) + PairBuck(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairBuckOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairBuckOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,7 +79,8 @@ void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; @@ -145,7 +141,7 @@ void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_buck_omp.h b/src/USER-OMP/pair_buck_omp.h index 40b6702e6f0da006d1ccbdd2a1eb1e14550a4457..c73e3f0d08c81d498712cbe3964b7f46bd2d7fed 100644 --- a/src/USER-OMP/pair_buck_omp.h +++ b/src/USER-OMP/pair_buck_omp.h @@ -39,7 +39,7 @@ class PairBuckOMP : public PairBuck, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp index 01bd5f6eaa36b4eaa93b935236acaf4a431c5564..287b39ceb1f67fa214197b738e5486c0f152a2aa 100644 --- a/src/USER-OMP/pair_cdeam_omp.cpp +++ b/src/USER-OMP/pair_cdeam_omp.cpp @@ -44,7 +44,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairCDEAMOMP::PairCDEAMOMP(LAMMPS *lmp, int _cdeamVersion) : - PairCDEAM(lmp,_cdeamVersion), PairEAM(lmp), ThrOMP(lmp, PAIR) + PairEAM(lmp), PairCDEAM(lmp,_cdeamVersion), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -55,7 +55,6 @@ void PairCDEAMOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -78,22 +77,19 @@ void PairCDEAMOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, *rho_t, *rhoB_t, *D_values_t; - - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - if (force->newton_pair) { - rho_t = rho + tid*nall; - rhoB_t = rhoB + tid*nall; - D_values_t = D_values + tid*nall; - } else { - rho_t = rho + tid*atom->nlocal; - rhoB_t = rhoB + tid*atom->nlocal; - D_values_t = D_values + tid*atom->nlocal; - } + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_cdeam(nall, rho, rhoB, D_values); + else + thr->init_cdeam(atom->nlocal, rho, rhoB, D_values); switch (cdeamVersion) { @@ -101,15 +97,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag) if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<1,1,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr); + else eval<1,1,0,1>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<1,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr); + else eval<1,0,0,1>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<0,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr); + else eval<0,0,0,1>(ifrom, ito, thr); } break; @@ -117,15 +113,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag) if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<1,1,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr); + else eval<1,1,0,2>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<1,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr); + else eval<1,0,0,2>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<0,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr); + else eval<0,0,0,2>(ifrom, ito, thr); } break; @@ -136,18 +132,12 @@ void PairCDEAMOMP::compute(int eflag, int vflag) error->all(FLERR,"unsupported eam/cd pair style variant"); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION> -void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, - double *D_values_t, int iifrom, int iito, int tid) +void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -156,10 +146,17 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const rhoB_t = thr->get_rhoB(); + double * const D_values_t = thr->get_D_values(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; double fxtmp,fytmp,fztmp; @@ -167,18 +164,6 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, numneigh = list->numneigh; firstneigh = list->firstneigh; - // zero out density - - if (NEWTON_PAIR) { - memset(rho_t, 0, nall*sizeof(double)); - memset(rhoB_t, 0, nall*sizeof(double)); - memset(D_values_t, 0, nall*sizeof(double)); - } else { - memset(rho_t, 0, nlocal*sizeof(double)); - memset(rhoB_t, 0, nlocal*sizeof(double)); - memset(D_values_t, 0, nlocal*sizeof(double)); - } - // Stage I // Compute rho and rhoB at each local atom site. @@ -240,10 +225,10 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, if (NEWTON_PAIR) { // reduce per thread density - data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); - data_reduce_thr(&(rhoB[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(rho, nall, nthreads, 1, tid); + data_reduce_thr(rhoB, nall, nthreads, 1, tid); if (CDEAMVERSION==1) - data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(D_values, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -259,10 +244,10 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, } else { // reduce per thread density - data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); - data_reduce_thr(&(rhoB[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(rho, nlocal, nthreads, 1, tid); + data_reduce_thr(rhoB, nlocal, nthreads, 1, tid); if (CDEAMVERSION==1) - data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(D_values, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -277,8 +262,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, fp[i] = FPrimeOfRho(index, type[i]); if(EFLAG) { phi = FofRho(index, type[i]); - if (eflag_global) eng_vdwl_thr[tid] += phi; - if (eflag_atom) eatom_thr[tid][i] += phi; + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); } } @@ -360,7 +344,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, } if (NEWTON_PAIR) { - data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(D_values, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -375,7 +359,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, sync_threads(); } else { - data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(D_values, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -525,7 +509,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, if(EFLAG) evdwl = phi; if(EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, - fpair,delx,dely,delz,tid); + fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_cdeam_omp.h b/src/USER-OMP/pair_cdeam_omp.h index 85b124cb171ba58401efdd5b55f658432ef213a7..46f460f8fa07144c4235bd7fde547dc99fbe7575 100644 --- a/src/USER-OMP/pair_cdeam_omp.h +++ b/src/USER-OMP/pair_cdeam_omp.h @@ -40,8 +40,7 @@ class PairCDEAMOMP : public PairCDEAM, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION> - void eval(double **f, double *rho_t, double *rhoB_t, double *D_values_t, - int iifrom, int iito, int tid); + void eval(int iifrom, int iito, ThrData * const thr); }; /// The one-site concentration formulation of CD-EAM. diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp index c8bc74407a1bbdd0cbc5a427123376373094ec76..7bfe1c04dea65098eae8fed7ae6458a819badfad 100644 --- a/src/USER-OMP/pair_colloid_omp.cpp +++ b/src/USER-OMP/pair_colloid_omp.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairColloidOMP::PairColloidOMP(LAMMPS *lmp) : - PairColloid(lmp), ThrOMP(lmp, PAIR) + PairColloid(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,7 +37,6 @@ void PairColloidOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -45,38 +44,34 @@ void PairColloidOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid) +void PairColloidOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -86,10 +81,11 @@ void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -204,7 +200,7 @@ void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } f[i][0] += fxtmp; f[i][1] += fytmp; diff --git a/src/USER-OMP/pair_colloid_omp.h b/src/USER-OMP/pair_colloid_omp.h index a0be13cbb41fd4a63e0cf9b1a6d8d80944701d43..cde7e9b6502686ca84f7a4d26a985466341e9656 100644 --- a/src/USER-OMP/pair_colloid_omp.h +++ b/src/USER-OMP/pair_colloid_omp.h @@ -39,7 +39,7 @@ class PairColloidOMP : public PairColloid, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_coul_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp index bb19db3d22e9f91f5e3981b052908af6a1f9c4ea..a8473eec38f7ee2ce4c39b2bfa519d8285758924 100644 --- a/src/USER-OMP/pair_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_coul_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairCoulCutOMP::PairCoulCutOMP(LAMMPS *lmp) : - PairCoulCut(lmp), ThrOMP(lmp, PAIR) + PairCoulCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; @@ -86,12 +81,13 @@ void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -142,7 +138,7 @@ void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - 0.0,ecoul,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_coul_cut_omp.h b/src/USER-OMP/pair_coul_cut_omp.h index eca9958ff23652ba7d36d76d9bb158ab14cc5258..3499ee4ae639bbe8a10c8e5c2659fb07478a63b1 100644 --- a/src/USER-OMP/pair_coul_cut_omp.h +++ b/src/USER-OMP/pair_coul_cut_omp.h @@ -39,7 +39,7 @@ class PairCoulCutOMP : public PairCoulCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_coul_debye_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp index 1c2e7b8e07deb62cfd30191abddd102518d2f51e..73e579262e2fa86d86e7c6a56ae263eadb391ba7 100644 --- a/src/USER-OMP/pair_coul_debye_omp.cpp +++ b/src/USER-OMP/pair_coul_debye_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairCoulDebyeOMP::PairCoulDebyeOMP(LAMMPS *lmp) : - PairCoulDebye(lmp), ThrOMP(lmp, PAIR) + PairCoulDebye(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; @@ -86,12 +81,13 @@ void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -144,7 +140,7 @@ void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - 0.0,ecoul,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_coul_debye_omp.h b/src/USER-OMP/pair_coul_debye_omp.h index 7ad599bb1b70b978e24502bb148f8338d7e740b1..f016de8b5d8e8a7e29de1ddb8a6f63f8e92decf1 100644 --- a/src/USER-OMP/pair_coul_debye_omp.h +++ b/src/USER-OMP/pair_coul_debye_omp.h @@ -39,7 +39,7 @@ class PairCoulDebyeOMP : public PairCoulDebye, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_coul_long_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp index 3a2e05159128af8c84a3cd3e40e8351a4a18f23c..82f070d37dffd357c4cbdd933487e87abe4590ae 100644 --- a/src/USER-OMP/pair_coul_long_omp.cpp +++ b/src/USER-OMP/pair_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairCoulLongOMP::PairCoulLongOMP(LAMMPS *lmp) : - PairCoulLong(lmp), ThrOMP(lmp, PAIR) + PairCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itable,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; @@ -96,12 +91,13 @@ void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -181,7 +177,7 @@ void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - 0.0,ecoul,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_coul_long_omp.h b/src/USER-OMP/pair_coul_long_omp.h index 7b63f762f23e0c11525d9eaa66dfbe24bf9b4ad2..d7655637d0f84cffd7880521b890741777a66c93 100644 --- a/src/USER-OMP/pair_coul_long_omp.h +++ b/src/USER-OMP/pair_coul_long_omp.h @@ -39,7 +39,7 @@ class PairCoulLongOMP : public PairCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_dipole_cut_omp.cpp b/src/USER-OMP/pair_dipole_cut_omp.cpp index 9ba93b19b56192b2f90d23fa31738f2174f1eba1..85079dd718a69dbf82d5d860c1285bf752b0ebe2 100644 --- a/src/USER-OMP/pair_dipole_cut_omp.cpp +++ b/src/USER-OMP/pair_dipole_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairDipoleCutOMP::PairDipoleCutOMP(LAMMPS *lmp) : - PairDipoleCut(lmp), ThrOMP(lmp, PAIR) + PairDipoleCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairDipoleCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,34 @@ void PairDipoleCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); - else eval<1,1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); - else eval<1,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); - else eval<0,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces and torques into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairDipoleCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; @@ -90,14 +83,16 @@ void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, i evdwl = 0.0; - double **x = atom->x; - double *q = atom->q; - double **mu = atom->mu; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const double * const q = atom->q; + const double * const * const mu = atom->mu; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; ilist = list->ilist; @@ -265,7 +260,7 @@ void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, i } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid); + evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_dipole_cut_omp.h b/src/USER-OMP/pair_dipole_cut_omp.h index 832bd4d3bec4f773b6d27595aa8a066865becd72..b175450c9f14d8692590a6b5fc7748259d55a856 100644 --- a/src/USER-OMP/pair_dipole_cut_omp.h +++ b/src/USER-OMP/pair_dipole_cut_omp.h @@ -39,7 +39,7 @@ class PairDipoleCutOMP : public PairDipoleCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_dipole_sf_omp.cpp b/src/USER-OMP/pair_dipole_sf_omp.cpp index 9ebc72d41437fdec19016ea0448585d4c2b2d090..b920ff5c83f7367b7db5176a1df0221b5ec9c551 100644 --- a/src/USER-OMP/pair_dipole_sf_omp.cpp +++ b/src/USER-OMP/pair_dipole_sf_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairDipoleSFOMP::PairDipoleSFOMP(LAMMPS *lmp) : - PairDipoleSF(lmp), ThrOMP(lmp, PAIR) + PairDipoleSF(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairDipoleSFOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,34 @@ void PairDipoleSFOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); - else eval<1,1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); - else eval<1,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); - else eval<0,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces and torques into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; @@ -94,14 +87,16 @@ void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, in evdwl = 0.0; - double **x = atom->x; - double *q = atom->q; - double **mu = atom->mu; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const double * const q = atom->q; + const double * const * const mu = atom->mu; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; ilist = list->ilist; @@ -297,7 +292,7 @@ void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, in } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid); + evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_dipole_sf_omp.h b/src/USER-OMP/pair_dipole_sf_omp.h index e601e2d569dc4a1b3df8bbbb6bedaadf4ddcfa8a..89c80fa78809cd1708861dfd538a89d38d23a944 100644 --- a/src/USER-OMP/pair_dipole_sf_omp.h +++ b/src/USER-OMP/pair_dipole_sf_omp.h @@ -39,7 +39,7 @@ class PairDipoleSFOMP : public PairDipoleSF, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_dpd_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp index be1e32f37d96e9c15c1ddd12e87b5c5618707b81..0d24ce401dda50c8a0a44d6432c1d763ea2e4b0f 100644 --- a/src/USER-OMP/pair_dpd_omp.cpp +++ b/src/USER-OMP/pair_dpd_omp.cpp @@ -29,7 +29,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairDPDOMP::PairDPDOMP(LAMMPS *lmp) : - PairDPD(lmp), ThrOMP(lmp, PAIR) + PairDPD(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; random_thr = NULL; @@ -54,7 +54,6 @@ void PairDPDOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -63,46 +62,46 @@ void PairDPDOMP::compute(int eflag, int vflag) if (!random_thr) random_thr = new RanMars*[nthreads]; - + + // to ensure full compatibility with the serial DPD style + // we use is random number generator instance for thread 0 random_thr[0] = random; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. if (random_thr && tid > 0) random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + comm->nprocs*tid); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid) +void PairDPDOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -112,14 +111,15 @@ void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - double **v = atom->v; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; - double dtinvsqrt = 1.0/sqrt(update->dt); + const double * const * const x = atom->x; + const double * const * const v = atom->v; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double *special_lj = force->special_lj; + const double dtinvsqrt = 1.0/sqrt(update->dt); double fxtmp,fytmp,fztmp; - RanMars &rng = *random_thr[tid]; + RanMars &rng = *random_thr[thr->get_tid()]; ilist = list->ilist; numneigh = list->numneigh; @@ -190,7 +190,7 @@ void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_dpd_omp.h b/src/USER-OMP/pair_dpd_omp.h index 9385e5444f6545a68fe9dc85c5f4bb8bc0ec8122..c3802f8e6000e4607617c374036027f0fd5933e9 100644 --- a/src/USER-OMP/pair_dpd_omp.h +++ b/src/USER-OMP/pair_dpd_omp.h @@ -43,7 +43,7 @@ class PairDPDOMP : public PairDPD, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_dpd_tstat_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp index 7e3fb8b3987cb491c2b3a6ee1d134a54aaf9587f..50a1bf439ebb6efba3bc6d1345a97ff4fd3e9aa8 100644 --- a/src/USER-OMP/pair_dpd_tstat_omp.cpp +++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp @@ -29,7 +29,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairDPDTstatOMP::PairDPDTstatOMP(LAMMPS *lmp) : - PairDPDTstat(lmp), ThrOMP(lmp, PAIR) + PairDPDTstat(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; random_thr = NULL; @@ -54,7 +54,6 @@ void PairDPDTstatOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -64,45 +63,45 @@ void PairDPDTstatOMP::compute(int eflag, int vflag) if (!random_thr) random_thr = new RanMars*[nthreads]; + // to ensure full compatibility with the serial DPD style + // we use is random number generator instance for thread 0 random_thr[0] = random; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. if (random_thr && tid > 0) random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + comm->nprocs*tid); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid) +void PairDPDTstatOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -112,14 +111,15 @@ void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - double **v = atom->v; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; - double dtinvsqrt = 1.0/sqrt(update->dt); + const double * const * const x = atom->x; + const double * const * const v = atom->v; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double *special_lj = force->special_lj; + const double dtinvsqrt = 1.0/sqrt(update->dt); double fxtmp,fytmp,fztmp; - RanMars &rng = *random_thr[tid]; + RanMars &rng = *random_thr[thr->get_tid()]; // adjust sigma if target T is changing @@ -192,7 +192,7 @@ void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - 0.0,0.0,fpair,delx,dely,delz,tid); + 0.0,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_dpd_tstat_omp.h b/src/USER-OMP/pair_dpd_tstat_omp.h index 14f640a9259f2a96b5c77960e69e972399de61d6..87c9de5505db70fa491e1e9a178614634f7bb319 100644 --- a/src/USER-OMP/pair_dpd_tstat_omp.h +++ b/src/USER-OMP/pair_dpd_tstat_omp.h @@ -43,7 +43,7 @@ class PairDPDTstatOMP : public PairDPDTstat, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp index 0ae4d54fb7e0369f7840c6abf530640e825d2220..c014eb75e29dd2e3d5956b4bc59023823d7703a9 100644 --- a/src/USER-OMP/pair_eam_omp.cpp +++ b/src/USER-OMP/pair_eam_omp.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairEAMOMP::PairEAMOMP(LAMMPS *lmp) : - PairEAM(lmp), ThrOMP(lmp, PAIR) + PairEAM(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -39,7 +39,6 @@ void PairEAMOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -58,42 +57,39 @@ void PairEAMOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, *rho_t; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + if (force->newton_pair) - rho_t = rho + tid*nall; - else rho_t = rho + tid*atom->nlocal; + thr->init_eam(nall, rho); + else + thr->init_eam(atom->nlocal, rho); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, rho_t, ifrom, ito, tid); - else eval<1,1,0>(f, rho_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, rho_t, ifrom, ito, tid); - else eval<1,0,0>(f, rho_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, rho_t, ifrom, ito, tid); - else eval<0,0,0>(f, rho_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairEAMOMP::eval(double **f, double *rho_t, - int iifrom, int iito, int tid) +void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,m,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -103,10 +99,15 @@ void PairEAMOMP::eval(double **f, double *rho_t, evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; double fxtmp,fytmp,fztmp; @@ -114,11 +115,6 @@ void PairEAMOMP::eval(double **f, double *rho_t, numneigh = list->numneigh; firstneigh = list->firstneigh; - // zero out density - - if (NEWTON_PAIR) memset(rho_t, 0, nall*sizeof(double)); - else memset(rho_t, 0, nlocal*sizeof(double)); - // rho = density at each atom // loop over neighbors of my atoms @@ -164,7 +160,7 @@ void PairEAMOMP::eval(double **f, double *rho_t, if (NEWTON_PAIR) { // reduce per thread density - data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(rho, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -178,7 +174,7 @@ void PairEAMOMP::eval(double **f, double *rho_t, sync_threads(); } else { - data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(rho, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -198,8 +194,7 @@ void PairEAMOMP::eval(double **f, double *rho_t, fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2]; if (EFLAG) { phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; - if (eflag_global) eng_vdwl_thr[tid] += phi; - if (eflag_atom) eatom_thr[tid][i] += phi; + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); } } @@ -283,7 +278,7 @@ void PairEAMOMP::eval(double **f, double *rho_t, if (EFLAG) evdwl = phi; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_eam_omp.h b/src/USER-OMP/pair_eam_omp.h index 1184cb34bcc4236a7509dcca17ede5cd8a70357e..6b0f1274fcd9419934a7d43d35832581d0e9d40f 100644 --- a/src/USER-OMP/pair_eam_omp.h +++ b/src/USER-OMP/pair_eam_omp.h @@ -39,7 +39,7 @@ class PairEAMOMP : public PairEAM, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double *rho_t, int iifrom, int iito, int tid); + void eval(int iifrom, int iito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp index 65b05c8143b2f92c59fc248b2940298a340a2590..f0d6d47cecce8eb009cf75a9c0e0e1dee2aea270 100644 --- a/src/USER-OMP/pair_edip_omp.cpp +++ b/src/USER-OMP/pair_edip_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairEDIPOMP::PairEDIPOMP(LAMMPS *lmp) : - PairEDIP(lmp), ThrOMP(lmp, PAIR) + PairEDIP(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairEDIPOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = vflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,35 +43,31 @@ void PairEDIPOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } - } else eval<0,0,0>(f, ifrom, ito, tid); + } else eval<0,0,0>(ifrom, ito, thr); - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int VFLAG_ATOM> -void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) +void PairEDIPOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,k,ii,inum,jnum; int itype,jtype,ktype,ijparam,ikparam,ijkparam; @@ -133,6 +128,8 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) double potentia3B_factor; double potential2B_factor; + const int tid = thr->get_tid(); + double *pre_thrInvR_ij = preInvR_ij + tid * leadDimInteractionList; double *pre_thrExp3B_ij = preExp3B_ij + tid * leadDimInteractionList; double *pre_thrExp3BDerived_ij = preExp3BDerived_ij + tid * leadDimInteractionList; @@ -141,9 +138,10 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) double *pre_thrPow2B_ij = prePow2B_ij + tid * leadDimInteractionList; double *pre_thrForceCoord = preForceCoord + tid * leadDimInteractionList; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; inum = list->inum; ilist = list->ilist; @@ -340,7 +338,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = (exp2B_ij * potential2B_factor); if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, evdwl, 0.0, - -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid); + -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr); // three-body Forces @@ -435,7 +433,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor); - if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik); + if (evflag) ev_tally3_thr(this,i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik,thr); } } @@ -469,7 +467,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.0, 0.0, - forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid); + forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr); } } } diff --git a/src/USER-OMP/pair_edip_omp.h b/src/USER-OMP/pair_edip_omp.h index 55c34db345ae7406db63d8a1cf0d920ee03d1a8c..55e10c83bb58ee1d3263f821159ea3bf356f112e 100644 --- a/src/USER-OMP/pair_edip_omp.h +++ b/src/USER-OMP/pair_edip_omp.h @@ -34,7 +34,7 @@ class PairEDIPOMP : public PairEDIP, public ThrOMP { private: template <int EVFLAG, int EFLAG, int VFLAG_ATOM> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp index d31ad201207e359faa6afb2c83c9e840580b9c64..7184adb7813269953f1e63f21e7717b4a9fee791 100644 --- a/src/USER-OMP/pair_eim_omp.cpp +++ b/src/USER-OMP/pair_eim_omp.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairEIMOMP::PairEIMOMP(LAMMPS *lmp) : - PairEIM(lmp), ThrOMP(lmp, PAIR) + PairEIM(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -39,7 +39,6 @@ void PairEIMOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -58,46 +57,39 @@ void PairEIMOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, *rho_t, *fp_t; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - if (force->newton_pair) { - rho_t = rho + tid*nall; - fp_t = fp + tid*nall; - } else { - rho_t = rho + tid*atom->nlocal; - fp_t = fp + tid*atom->nlocal; - } + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_eim(nall, rho, fp); + else + thr->init_eim(atom->nlocal, rho, fp); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, rho_t, fp_t, ifrom, ito, tid); - else eval<1,1,0>(f, rho_t, fp_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, rho_t, fp_t, ifrom, ito, tid); - else eval<1,0,0>(f, rho_t, fp_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, rho_t, fp_t, ifrom, ito, tid); - else eval<0,0,0>(f, rho_t, fp_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, - int iifrom, int iito, int tid) +void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,m,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -107,10 +99,17 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const fp_t = thr->get_fp(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; double fxtmp,fytmp,fztmp; @@ -118,16 +117,6 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, numneigh = list->numneigh; firstneigh = list->firstneigh; - // zero out density and fp - - if (NEWTON_PAIR) { - memset(rho_t, 0, nall*sizeof(double)); - memset(fp_t, 0, nall*sizeof(double)); - } else { - memset(rho_t, 0, nlocal*sizeof(double)); - memset(fp_t, 0, nlocal*sizeof(double)); - } - // rho = density at each atom // loop over neighbors of my atoms @@ -171,7 +160,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, // communicate and sum densities if (NEWTON_PAIR) { // reduce per thread density - data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(rho, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -185,7 +174,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, } } else { - data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(rho, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -243,7 +232,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, // communicate and sum modified densities if (NEWTON_PAIR) { // reduce per thread density - data_reduce_thr(&(fp[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(fp, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -257,7 +246,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, } } else { - data_reduce_thr(&(fp[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(fp, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -279,8 +268,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, itype = type[i]; if (EFLAG) { phi = 0.5*rho[i]*fp[i]; - if (eflag_global) eng_vdwl_thr[tid] += phi; - if (eflag_atom) eatom_thr[tid][i] += phi; + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); } } @@ -345,7 +333,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_eim_omp.h b/src/USER-OMP/pair_eim_omp.h index 3693492e09158492c302d0a71a21c0a4036dbeab..ad273e28eb480c5f6f38ab80f70b2b9f96c6a910 100644 --- a/src/USER-OMP/pair_eim_omp.h +++ b/src/USER-OMP/pair_eim_omp.h @@ -39,7 +39,7 @@ class PairEIMOMP : public PairEIM, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double *rho_t, double *fp_t, int iifrom, int iito, int tid); + void eval(int iifrom, int iito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gauss_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp index e8b255d0b78ae9641f3ff27f13eee61df8fad1cc..4f26670715d86d0b635c7063340554d59880a1ff 100644 --- a/src/USER-OMP/pair_gauss_omp.cpp +++ b/src/USER-OMP/pair_gauss_omp.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGaussOMP::PairGaussOMP(LAMMPS *lmp) : - PairGauss(lmp), ThrOMP(lmp, PAIR) + PairGauss(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,46 +37,44 @@ void PairGaussOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; + double occ = 0.0; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:occ) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) occ = eval<1,1,1>(ifrom, ito, thr); + else occ = eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) occ = eval<1,0,1>(ifrom, ito, thr); + else occ = eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) occ = eval<0,0,1>(ifrom, ito, thr); + else occ = eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); + if (eflag_global) pvector[0] = occ; } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid) +double PairGaussOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -86,10 +84,11 @@ void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -149,14 +148,14 @@ void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } - if (eflag_global) pvector[0] = occ; + return occ; } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-OMP/pair_gauss_omp.h b/src/USER-OMP/pair_gauss_omp.h index 7f8fc9a85b091c31415f9e28657d772e21ccaf34..81d9d0ce3f2484847b2cdff9685fd18576c16fcc 100644 --- a/src/USER-OMP/pair_gauss_omp.h +++ b/src/USER-OMP/pair_gauss_omp.h @@ -39,7 +39,7 @@ class PairGaussOMP : public PairGauss, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + double eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp index ff115e8ef775a01a22c08fa5159b0c14a91ded23..d8ec6c9b3236939f20df2867f698426b60530e4f 100644 --- a/src/USER-OMP/pair_gayberne_omp.cpp +++ b/src/USER-OMP/pair_gayberne_omp.cpp @@ -27,7 +27,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGayBerneOMP::PairGayBerneOMP(LAMMPS *lmp) : - PairGayBerne(lmp), ThrOMP(lmp, PAIR) + PairGayBerne(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -38,7 +38,6 @@ void PairGayBerneOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -46,40 +45,34 @@ void PairGayBerneOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); - else eval<1,1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); - else eval<1,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); - else eval<0,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces and torques into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int tid) +void PairGayBerneOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; @@ -88,11 +81,13 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t int *ilist,*jlist,*numneigh,**firstneigh; double *iquat,*jquat; - double **x = atom->x; - int *ellipsoid = atom->ellipsoid; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const tor = thr->get_torque(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + const int * const ellipsoid = atom->ellipsoid; AtomVecEllipsoid::Bonus *bonus = avec->bonus; @@ -108,6 +103,7 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t i = ilist[ii]; itype = type[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; if (form[itype][itype] == ELLIPSE_ELLIPSE) { iquat = bonus[ellipsoid[i]].quat; @@ -187,12 +183,12 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t ttor[1] *= factor_lj; ttor[2] *= factor_lj; - f[i][0] += fforce[0]; - f[i][1] += fforce[1]; - f[i][2] += fforce[2]; - tor[i][0] += ttor[0]; - tor[i][1] += ttor[1]; - tor[i][2] += ttor[2]; + fxtmp += fforce[0]; + fytmp += fforce[1]; + fztmp += fforce[2]; + t1tmp += ttor[0]; + t2tmp += ttor[1]; + t3tmp += ttor[2]; if (NEWTON_PAIR || j < nlocal) { rtor[0] *= factor_lj; @@ -210,9 +206,15 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fforce[0],fforce[1],fforce[2], - -r12[0],-r12[1],-r12[2],tid); + -r12[0],-r12[1],-r12[2],thr); } } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + tor[i][0] += t1tmp; + tor[i][1] += t2tmp; + tor[i][2] += t3tmp; } } diff --git a/src/USER-OMP/pair_gayberne_omp.h b/src/USER-OMP/pair_gayberne_omp.h index 737b4ec67de0868f4ab394e21683852b59f5ed9d..0bd0b8b086464597e51d1f18fb557a16046d1238 100644 --- a/src/USER-OMP/pair_gayberne_omp.h +++ b/src/USER-OMP/pair_gayberne_omp.h @@ -39,7 +39,7 @@ class PairGayBerneOMP : public PairGayBerne, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp index 1866833afed41263a14967426188acb1cb9f208b..23b8b8f5c20c99bf720b057772fb1b0e5b1aa01a 100644 --- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHertzHistoryOMP::PairGranHertzHistoryOMP(LAMMPS *lmp) : - PairGranHertzHistory(lmp), ThrOMP(lmp, PAIR) + PairGranHertzHistory(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,7 +37,6 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int shearupdate = (update->ntimestep > laststep) ? 1 : 0; @@ -47,35 +46,29 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) - if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid); - else eval<1,0>(f, torque, ifrom, ito, tid); + if (shearupdate) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); else - if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid); - else eval<0,0>(f, torque, ifrom, ito, tid); + if (shearupdate) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); - // reduce per thread forces and torque into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - laststep = update->ntimestep; } template <int EVFLAG, int SHEARUPDATE> -void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairGranHertzHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; @@ -90,15 +83,17 @@ void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int int *touch,**firsttouch; double *shear,*allshear,**firstshear; - double **x = atom->x; - double **v = atom->v; - double **omega = atom->omega; - double *radius = atom->radius; - double *rmass = atom->rmass; - double *mass = atom->mass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double t1tmp,t2tmp,t3tmp; @@ -274,7 +269,7 @@ void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, - 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); } } diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.h b/src/USER-OMP/pair_gran_hertz_history_omp.h index 66d7bc0fa5e7c594887d10d663ed81c3efe5acf8..956e057093f0acf6b87b814a8adfae901e64a1c2 100644 --- a/src/USER-OMP/pair_gran_hertz_history_omp.h +++ b/src/USER-OMP/pair_gran_hertz_history_omp.h @@ -39,7 +39,7 @@ class PairGranHertzHistoryOMP : public PairGranHertzHistory, public ThrOMP { private: template <int EVFLAG, int SHEARUPDATE> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp index ad0537b516a437895857176dbd6508a095c6bb12..5212b30ce296883cf605463a856d6ba5c8c753ec 100644 --- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) : - PairGranHookeHistory(lmp), ThrOMP(lmp, PAIR) + PairGranHookeHistory(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; // trigger use of OpenMP version of FixShearHistory @@ -42,7 +42,6 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int shearupdate = (update->ntimestep > laststep) ? 1 : 0; @@ -52,38 +51,33 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) - if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid); - else eval<1,0>(f, torque, ifrom, ito, tid); + if (shearupdate) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); else - if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid); - else eval<0,0>(f, torque, ifrom, ito, tid); + if (shearupdate) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); - // reduce per thread forces and torque into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - laststep = update->ntimestep; } template <int EVFLAG, int SHEARUPDATE> -void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; + double myshear[3]; double radi,radj,radsum,rsq,r,rinv,rsqinv; double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; double wr1,wr2,wr3; @@ -95,15 +89,17 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int int *touch,**firsttouch; double *shear,*allshear,**firstshear; - double **x = atom->x; - double **v = atom->v; - double **omega = atom->omega; - double *radius = atom->radius; - double *rmass = atom->rmass; - double *mass = atom->mass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double t1tmp,t2tmp,t3tmp; @@ -144,10 +140,9 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int // unset non-touching neighbors touch[jj] = 0; - shear = &allshear[3*jj]; - shear[0] = 0.0; - shear[1] = 0.0; - shear[2] = 0.0; + myshear[0] = 0.0; + myshear[1] = 0.0; + myshear[2] = 0.0; } else { r = sqrt(rsq); @@ -186,7 +181,6 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int if (mask[i] & freeze_group_bit) meff = rmass[j]; if (mask[j] & freeze_group_bit) meff = rmass[i]; } else { - itype = type[i]; jtype = type[j]; meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]); if (mask[i] & freeze_group_bit) meff = mass[jtype]; @@ -207,31 +201,31 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int // shear history effects touch[jj] = 1; - shear = &allshear[3*jj]; + memcpy(myshear,allshear + 3*jj, 3*sizeof(double)); if (SHEARUPDATE) { - shear[0] += vtr1*dt; - shear[1] += vtr2*dt; - shear[2] += vtr3*dt; + myshear[0] += vtr1*dt; + myshear[1] += vtr2*dt; + myshear[2] += vtr3*dt; } - shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + - shear[2]*shear[2]); + shrmag = sqrt(myshear[0]*myshear[0] + myshear[1]*myshear[1] + + myshear[2]*myshear[2]); // rotate shear displacements - rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz; + rsht = myshear[0]*delx + myshear[1]*dely + myshear[2]*delz; rsht *= rsqinv; if (SHEARUPDATE) { - shear[0] -= rsht*delx; - shear[1] -= rsht*dely; - shear[2] -= rsht*delz; + myshear[0] -= rsht*delx; + myshear[1] -= rsht*dely; + myshear[2] -= rsht*delz; } // tangential forces = shear + tangential velocity damping - fs1 = - (kt*shear[0] + meff*gammat*vtr1); - fs2 = - (kt*shear[1] + meff*gammat*vtr2); - fs3 = - (kt*shear[2] + meff*gammat*vtr3); + fs1 = - (kt*myshear[0] + meff*gammat*vtr1); + fs2 = - (kt*myshear[1] + meff*gammat*vtr2); + fs3 = - (kt*myshear[2] + meff*gammat*vtr3); // rescale frictional displacements and forces if needed @@ -242,9 +236,9 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int if (shrmag != 0.0) { const double fnfs = fn/fs; const double mgkt = meff*gammat/kt; - shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1; - shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2; - shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3; + myshear[0] = fnfs * (myshear[0] + mgkt*vtr1) - mgkt*vtr1; + myshear[1] = fnfs * (myshear[1] + mgkt*vtr2) - mgkt*vtr2; + myshear[2] = fnfs * (myshear[2] + mgkt*vtr3) - mgkt*vtr3; fs1 *= fnfs; fs2 *= fnfs; fs3 *= fnfs; @@ -277,9 +271,10 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, - 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); } + memcpy(allshear + 3*jj, myshear, 3*sizeof(double)); } f[i][0] += fxtmp; f[i][1] += fytmp; diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.h b/src/USER-OMP/pair_gran_hooke_history_omp.h index 33325025fcce2c14628e926f01516b7637ddb654..7588469e744408815af12be7f0e1b97ac24d54a2 100644 --- a/src/USER-OMP/pair_gran_hooke_history_omp.h +++ b/src/USER-OMP/pair_gran_hooke_history_omp.h @@ -39,7 +39,7 @@ class PairGranHookeHistoryOMP : public PairGranHookeHistory, public ThrOMP { private: template <int EVFLAG, int SHEARUPDATE> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp index d6991fa453e25ef8a79c68483c0129fc8b38c76a..fda9295b7078a82f105aade025b0fc3b2769a7c1 100644 --- a/src/USER-OMP/pair_gran_hooke_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHookeOMP::PairGranHookeOMP(LAMMPS *lmp) : - PairGranHooke(lmp), ThrOMP(lmp, PAIR) + PairGranHooke(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairGranHookeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,33 +43,28 @@ void PairGranHookeOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) - if (force->newton_pair) eval<1,1>(f, torque, ifrom, ito, tid); - else eval<1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); else - if (force->newton_pair) eval<0,1>(f, torque, ifrom, ito, tid); - else eval<0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); - // reduce per thread forces and torque into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int NEWTON_PAIR> -void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairGranHookeOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; @@ -82,15 +76,17 @@ void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, i double fn,fs,ft,fs1,fs2,fs3; int *ilist,*jlist,*numneigh,**firstneigh; - double **x = atom->x; - double **v = atom->v; - double **omega = atom->omega; - double *radius = atom->radius; - double *rmass = atom->rmass; - double *mass = atom->mass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double t1tmp,t2tmp,t3tmp; @@ -216,7 +212,7 @@ void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, i } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, - 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); } } diff --git a/src/USER-OMP/pair_gran_hooke_omp.h b/src/USER-OMP/pair_gran_hooke_omp.h index f2b093778c3f675b376b3cdc8f93d0ed9cbbf555..b275992bfacd096687147ec575bcf43596f88003 100644 --- a/src/USER-OMP/pair_gran_hooke_omp.h +++ b/src/USER-OMP/pair_gran_hooke_omp.h @@ -39,7 +39,7 @@ class PairGranHookeOMP : public PairGranHooke, public ThrOMP { private: template <int EVFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp index 012fd596b3998d4e13c8b31bb05c908dc3cfad56..5da3f2bdfa4c9e1a2d4fc42884856cda760512ce 100644 --- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp @@ -31,7 +31,7 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ PairHbondDreidingLJOMP::PairHbondDreidingLJOMP(LAMMPS *lmp) : - PairHbondDreidingLJ(lmp), ThrOMP(lmp, PAIR) + PairHbondDreidingLJ(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; hbcount_thr = hbeng_thr = NULL; @@ -54,7 +54,6 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -72,35 +71,31 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); - // reduce per thread hbond data if (eflag_global) { pvector[0] = 0.0; @@ -113,25 +108,26 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) +void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr) { - int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype; + int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype; double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2; double factor_hb,force_angle,force_kernel,evdwl,eng_lj; double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2; double fi[3],fj[3],delr1[3],delr2[3]; double r2inv,r10inv; double switch1,switch2; - int *ilist,*jlist,*klist,*numneigh,**firstneigh; + int *ilist,*jlist,*numneigh,**firstneigh; Param *pm; evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int **special = atom->special; - int **nspecial = atom->nspecial; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const double * const special_lj = force->special_lj; + const int * const * const nspecial = atom->nspecial; + const int * const * const special = atom->special; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -152,8 +148,8 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) itype = type[i]; if (!donor[itype]) continue; - klist = special[i]; - knum = nspecial[i][0]; + const int * const klist = special[i]; + const int knum = nspecial[i][0]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; @@ -270,7 +266,7 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) // KIJ instead of IJK b/c delr1/delr2 are both with respect to k - if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid); + if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr); if (EFLAG) { hbcount++; hbeng += evdwl; @@ -283,6 +279,7 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) f[i][1] += fytmp; f[i][2] += fztmp; } + const int tid = thr->get_tid(); hbcount_thr[tid] = static_cast<double>(hbcount); hbeng_thr[tid] = hbeng; } diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h index 1aef78490c2173fdb70a357317cb4a962641f7aa..937391684967734441ea04df84e3782b5f41d55b 100644 --- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h @@ -43,7 +43,7 @@ class PairHbondDreidingLJOMP : public PairHbondDreidingLJ, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp index b6c966f8c7d53dcf32325e971f40972f06a23220..bce4efdd3a00a6a7dbda1ec703b95af0fbba6cc7 100644 --- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp @@ -31,7 +31,7 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ PairHbondDreidingMorseOMP::PairHbondDreidingMorseOMP(LAMMPS *lmp) : - PairHbondDreidingMorse(lmp), ThrOMP(lmp, PAIR) + PairHbondDreidingMorse(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; hbcount_thr = hbeng_thr = NULL; @@ -54,7 +54,6 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -72,35 +71,31 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); - // reduce per thread hbond data if (eflag_global) { pvector[0] = 0.0; @@ -113,24 +108,25 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) +void PairHbondDreidingMorseOMP::eval(int iifrom, int iito, ThrData * const thr) { - int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype; + int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype; double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2; double factor_hb,force_angle,force_kernel,evdwl; double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2; double fi[3],fj[3],delr1[3],delr2[3]; double r,dr,dexp,eng_morse,switch1,switch2; - int *ilist,*jlist,*klist,*numneigh,**firstneigh; + int *ilist,*jlist,*numneigh,**firstneigh; Param *pm; evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int **special = atom->special; - int **nspecial = atom->nspecial; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const double * const special_lj = force->special_lj; + const int * const * const nspecial = atom->nspecial; + const int * const * const special = atom->special; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -151,8 +147,8 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) itype = type[i]; if (!donor[itype]) continue; - klist = special[i]; - knum = nspecial[i][0]; + const int * const klist = special[i]; + const int knum = nspecial[i][0]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; @@ -268,7 +264,7 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) // KIJ instead of IJK b/c delr1/delr2 are both with respect to k - if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid); + if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr); if (EFLAG) { hbcount++; hbeng += evdwl; @@ -281,6 +277,7 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) f[i][1] += fytmp; f[i][2] += fztmp; } + const int tid = thr->get_tid(); hbcount_thr[tid] = static_cast<double>(hbcount); hbeng_thr[tid] = hbeng; } diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h index 2a13c618c6eb92ec07844f84a5e46c18e660ee0b..d2edd7281b63d7da1800623e46b6792453f30e3c 100644 --- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h @@ -43,7 +43,7 @@ class PairHbondDreidingMorseOMP : public PairHbondDreidingMorse, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj96_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp index f0998363e1d0140c75e57e75d14e54da9dc44c30..68733c10939b3b2b3b31ead51e7b8379479f2d70 100644 --- a/src/USER-OMP/pair_lj96_cut_omp.cpp +++ b/src/USER-OMP/pair_lj96_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJ96CutOMP::PairLJ96CutOMP(LAMMPS *lmp) : - PairLJ96Cut(lmp), ThrOMP(lmp, PAIR) + PairLJ96Cut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJ96CutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJ96CutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJ96CutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -141,8 +137,8 @@ void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj96_cut_omp.h b/src/USER-OMP/pair_lj96_cut_omp.h index 333212303daed1bf9ea9cb76f8b5d7a9658eb32e..a8040320c8a01f63256ba693665204c261191608 100644 --- a/src/USER-OMP/pair_lj96_cut_omp.h +++ b/src/USER-OMP/pair_lj96_cut_omp.h @@ -39,7 +39,7 @@ class PairLJ96CutOMP : public PairLJ96Cut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp index 32ad05acdacea51a7e35a0b5435ff2aa19d1dd76..edfbe1f527c2c7e3ea7b83404cd567831a24a23f 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCharmmCoulCharmmImplicitOMP::PairLJCharmmCoulCharmmImplicitOMP(LAMMPS *lmp) : - PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, PAIR) + PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,64 +43,60 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulCharmmImplicitOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; double philj,switch1,switch2; - double invdenom_coul,invdenom_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; - invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; + const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; + const double invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; // loop over neighbors of my atoms @@ -193,7 +188,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, i } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h index ba016d7d3dcded9f308ed37ea17baefeffe3ad6d..dff01ce499b724a3143a3553c2d5e0c37e890834 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h @@ -39,7 +39,7 @@ class PairLJCharmmCoulCharmmImplicitOMP : public PairLJCharmmCoulCharmmImplicit, private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp index 6dac7a17f6496f224ff4aced8043be99a980a664..efdcc995dad9ac4ddb132678b59b1b7c35901672 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCharmmCoulCharmmOMP::PairLJCharmmCoulCharmmOMP(LAMMPS *lmp) : - PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, PAIR) + PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,64 +43,60 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulCharmmOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; double philj,switch1,switch2; - double invdenom_coul,invdenom_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; - invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; + const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; + const double invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; // loop over neighbors of my atoms @@ -193,7 +188,7 @@ void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h index f2889b05fea51939c15c211459b7a78702c89b3b..0eda030ebd866631a574ecaf468bd8c4b6f56ca3 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h @@ -39,7 +39,7 @@ class PairLJCharmmCoulCharmmOMP : public PairLJCharmmCoulCharmm, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp index c99f27f2e12a4867c47c239ea7baa9b1f06ed7e5..f9f32ea119fd5ebefed01f6b72db7f54d8039435 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCharmmCoulLongOMP::PairLJCharmmCoulLongOMP(LAMMPS *lmp) : - PairLJCharmmCoulLong(lmp), ThrOMP(lmp, PAIR) + PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype,itable; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -97,13 +92,14 @@ void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -214,7 +210,7 @@ void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h index b14e4c1fe48f8c956e76d4daa2040fd1ab2e0c60..91b9c01c1ab4de7aeac310d3866bbcb174a83d88 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h @@ -39,7 +39,7 @@ class PairLJCharmmCoulLongOMP : public PairLJCharmmCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp index 032188279321e135718371aa9e3dc4cf6b1ec718..e54c348e64d365aa72390f590f8fa58209715d5c 100644 --- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJClass2CoulCutOMP::PairLJClass2CoulCutOMP(LAMMPS *lmp) : - PairLJClass2CoulCut(lmp), ThrOMP(lmp, PAIR) + PairLJClass2CoulCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2CoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -87,13 +82,14 @@ void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -163,9 +159,9 @@ void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h index 5fe4895691603381ce33e167f2709c275fab86bc..b22a29aa184038007f0b1666b90a316d9c4cafc1 100644 --- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h @@ -39,7 +39,7 @@ class PairLJClass2CoulCutOMP : public PairLJClass2CoulCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp index 84d26ceb147dfde49aef26a14e0af75d652f8df5..20ad947d23f93cd01009e08b5ec33346236c7d7b 100644 --- a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJClass2CoulLongOMP::PairLJClass2CoulLongOMP(LAMMPS *lmp) : - PairLJClass2CoulLong(lmp), ThrOMP(lmp, PAIR) + PairLJClass2CoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2CoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -95,13 +90,14 @@ void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -181,7 +177,7 @@ void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.h b/src/USER-OMP/pair_lj_class2_coul_long_omp.h index da4ac3680f6262f04bd5f85e83ec2ce5d50f552c..b32799bf8494ae128a65ea6448e7fd3c08ee59c0 100644 --- a/src/USER-OMP/pair_lj_class2_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.h @@ -39,7 +39,7 @@ class PairLJClass2CoulLongOMP : public PairLJClass2CoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_class2_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp index 4f5d2550fc5862d6eeeadcc73fed23d2ae02e938..cff80d3f1d5de18acddcebbefea65204b6621c0c 100644 --- a/src/USER-OMP/pair_lj_class2_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJClass2OMP::PairLJClass2OMP(LAMMPS *lmp) : - PairLJClass2(lmp), ThrOMP(lmp, PAIR) + PairLJClass2(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJClass2OMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJClass2OMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2OMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -141,8 +137,8 @@ void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_class2_omp.h b/src/USER-OMP/pair_lj_class2_omp.h index cfe24bb714fb37a94436e2ec5099ffabaf579da6..317c7376c5210c62d7d93ab5a1e65cd702f82923 100644 --- a/src/USER-OMP/pair_lj_class2_omp.h +++ b/src/USER-OMP/pair_lj_class2_omp.h @@ -39,7 +39,7 @@ class PairLJClass2OMP : public PairLJClass2, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_coul_omp.cpp b/src/USER-OMP/pair_lj_coul_omp.cpp index 23e2a8d906372f36dc8b8a2690421e4432fac1ec..ae15087ba90f9aec9934dafbdf0d3175f9a88da5 100644 --- a/src/USER-OMP/pair_lj_coul_omp.cpp +++ b/src/USER-OMP/pair_lj_coul_omp.cpp @@ -34,7 +34,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCoulOMP::PairLJCoulOMP(LAMMPS *lmp) : - PairLJCoul(lmp), ThrOMP(lmp, PAIR) + PairLJCoul(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -45,7 +45,6 @@ void PairLJCoulOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -53,53 +52,50 @@ void PairLJCoulOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCoulOMP::eval(int iifrom, int iito, ThrData * const thr) { double evdwl,ecoul,fpair; evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; - double *x0 = x[0]; + const double *x0 = x[0]; double *f0 = f[0], *fi = f0; int *ilist = list->ilist; @@ -127,7 +123,7 @@ void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid) ni = sbmask(j); j &= NEIGHMASK; - { register double *xj = x0+(j+(j<<1)); + { register const double *xj = x0+(j+(j<<1)); d[0] = xi[0] - xj[0]; // pair vector d[1] = xi[1] - xj[1]; d[2] = xi[2] - xj[2]; } @@ -218,7 +214,7 @@ void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,d[0],d[1],d[2],tid); + evdwl,ecoul,fpair,d[0],d[1],d[2],thr); } } } diff --git a/src/USER-OMP/pair_lj_coul_omp.h b/src/USER-OMP/pair_lj_coul_omp.h index 619e609ba8c50cf634396155024a1f8bcd833c9e..e2259e16a0a031c1fcb63e1612aa28d7920ccadc 100644 --- a/src/USER-OMP/pair_lj_coul_omp.h +++ b/src/USER-OMP/pair_lj_coul_omp.h @@ -39,7 +39,7 @@ class PairLJCoulOMP : public PairLJCoul, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cubic_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp index 4f806bd71fe2798e411091901976de51366d9195..09e44a910776012151aab4132dd9482005bf9e32 100644 --- a/src/USER-OMP/pair_lj_cubic_omp.cpp +++ b/src/USER-OMP/pair_lj_cubic_omp.cpp @@ -26,7 +26,7 @@ using namespace PairLJCubicConstants; /* ---------------------------------------------------------------------- */ PairLJCubicOMP::PairLJCubicOMP(LAMMPS *lmp) : - PairLJCubic(lmp), ThrOMP(lmp, PAIR) + PairLJCubic(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,7 +37,6 @@ void PairLJCubicOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -45,38 +44,34 @@ void PairLJCubicOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCubicOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -86,10 +81,11 @@ void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -152,8 +148,8 @@ void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cubic_omp.h b/src/USER-OMP/pair_lj_cubic_omp.h index 559a6125ab3719ec1c5537eaa1174b46712a21f1..a6ed7d2b97db9ce3cc23568f7f3a0e43c2549362 100644 --- a/src/USER-OMP/pair_lj_cubic_omp.h +++ b/src/USER-OMP/pair_lj_cubic_omp.h @@ -39,7 +39,7 @@ class PairLJCubicOMP : public PairLJCubic, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp index be98ec38fc89b4b73475495feff9faba852d34b9..46114ce6131e12359d61f5cc314eb088e73706b2 100644 --- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutCoulCutOMP::PairLJCutCoulCutOMP(LAMMPS *lmp) : - PairLJCutCoulCut(lmp), ThrOMP(lmp, PAIR) + PairLJCutCoulCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -86,13 +81,14 @@ void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -159,11 +155,11 @@ void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - offset[itype][jtype]; evdwl *= factor_lj; - } - } else evdwl = 0.0; + } else evdwl = 0.0; + } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h index c8c34e2591780be3b32f2555571ef7fc073825c2..3d4be420e71cb9809a79aa0ad5edda22010ab326 100644 --- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h @@ -39,7 +39,7 @@ class PairLJCutCoulCutOMP : public PairLJCutCoulCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp index 13a4a1906f9b6218f582bcc08d82a542889125da..9d96f31dba5bcb25c8e331472e661afcf88c4d59 100644 --- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutCoulDebyeOMP::PairLJCutCoulDebyeOMP(LAMMPS *lmp) : - PairLJCutCoulDebye(lmp), ThrOMP(lmp, PAIR) + PairLJCutCoulDebye(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -87,13 +82,14 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -129,7 +125,6 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - if (rsq < cut_coulsq[itype][jtype]) { r = sqrt(rsq); rinv = 1.0/r; @@ -165,8 +160,9 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } else evdwl = 0.0; } + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h index 00cf540be22cb59f2aeb2647f8286b34141967bd..e2205cb7cef1f696866704f3e772ae018c5ac014 100644 --- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h @@ -39,7 +39,7 @@ class PairLJCutCoulDebyeOMP : public PairLJCutCoulDebye, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp index 1d8f977c9635d3dff671628957716ae629bec5e3..79976bf8a8eadfcdd1880c692ef28c69334e6bf1 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutCoulLongOMP::PairLJCutCoulLongOMP(LAMMPS *lmp) : - PairLJCutCoulLong(lmp), ThrOMP(lmp, PAIR) + PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype,itable; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -96,13 +91,14 @@ void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -198,9 +194,9 @@ void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_omp.h index ac408ba886e86cd7dd7514d424950462fa9075be..a907959ae3b518b598a50b41eab9a9128f986303 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.h @@ -39,7 +39,7 @@ class PairLJCutCoulLongOMP : public PairLJCutCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp index 6ada944c53b41718eb967a81eea70d08b6217458..78f35709a24c399c277f256c0df4f9c23eccf7bc 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp @@ -36,7 +36,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutCoulLongTIP4POMP::PairLJCutCoulLongTIP4POMP(LAMMPS *lmp) : - PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, PAIR) + PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; @@ -61,7 +61,6 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nlocal = atom->nlocal; @@ -76,8 +75,8 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag) } // cache corrected M positions in mpos[] - double **x = atom->x; - int *type = atom->type; + const double * const * const x = atom->x; + const int * const type = atom->type; for (int i = 0; i < nlocal; i++) { if (type[i] == typeO) { find_M(i,h1idx[i],h2idx[i],mpos[i]); @@ -101,39 +100,35 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (vflag) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (vflag) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (vflag) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (vflag) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - eval<0,0,0>(f, ifrom, ito, tid); + eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int VFLAG> -void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulLongTIP4POMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype,itable; int n,vlist[6]; @@ -151,13 +146,14 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -216,7 +212,7 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1, - evdwl,0.0,forcelj,delx,dely,delz,tid); + evdwl,0.0,forcelj,delx,dely,delz,thr); } // adjust rsq and delxyz for off-site O charge(s) @@ -423,7 +419,7 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; } else ecoul = 0.0; - if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,tid); + if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr); } } } diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h index 093fc0216b39c0b888f85d1c8ff00e057d40fdcc..ff49bdcedb06fbf8e6b1c445ca96ee8da301d564 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h +++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h @@ -39,7 +39,6 @@ class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP { virtual double memory_usage(); protected: - // this is to cache m-shift corrected positions. int maxmpos; // size of the following arrays int *h1idx, *h2idx; // local index of hydrogen atoms @@ -48,7 +47,7 @@ class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP { private: template <int EVFLAG, int EFLAG, int VFLAG> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp index 3d82149fece935542ed9f1b539c1f258e29108ac..4932a784bb6857af603ce7113c45f6fef251f370 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) + PairLJCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -139,8 +135,8 @@ void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cut_omp.h b/src/USER-OMP/pair_lj_cut_omp.h index 56f9f9b8a58e4005d68f0975c563d45128f27920..f97996e4807f2165407c8acd0ec93aec3ae8834d 100644 --- a/src/USER-OMP/pair_lj_cut_omp.h +++ b/src/USER-OMP/pair_lj_cut_omp.h @@ -39,7 +39,7 @@ class PairLJCutOMP : public PairLJCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_expand_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp index 7b06503ee4a63c185fe181c38efc004b29ac0d8c..4f93d3bd4201d93c3c079ede7a983495cec1a2d2 100644 --- a/src/USER-OMP/pair_lj_expand_omp.cpp +++ b/src/USER-OMP/pair_lj_expand_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJExpandOMP::PairLJExpandOMP(LAMMPS *lmp) : - PairLJExpand(lmp), ThrOMP(lmp, PAIR) + PairLJExpand(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJExpandOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJExpandOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJExpandOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -85,10 +80,11 @@ void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -143,8 +139,8 @@ void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_expand_omp.h b/src/USER-OMP/pair_lj_expand_omp.h index 29488deae88a5117e6afbd1aa3105493ccde9b8e..9ff8d3080a7ca516cbbfcb5b95e830b863f07551 100644 --- a/src/USER-OMP/pair_lj_expand_omp.h +++ b/src/USER-OMP/pair_lj_expand_omp.h @@ -39,7 +39,7 @@ class PairLJExpandOMP : public PairLJExpand, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp index 2e97fa1b5ead080fed6806ea9c18af4408505368..ca8875c7f8c810256e8ac4f86b6cef65f2b6a0a1 100644 --- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJGromacsCoulGromacsOMP::PairLJGromacsCoulGromacsOMP(LAMMPS *lmp) : - PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, PAIR) + PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJGromacsCoulGromacsOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -87,13 +82,14 @@ void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -190,7 +186,7 @@ void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h index d789bd6797f99809d62cdd5469f9e323bdc63328..ee506c2c4a6f2ce55eb26880be6184ed4ee00c4f 100644 --- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h @@ -39,7 +39,7 @@ class PairLJGromacsCoulGromacsOMP : public PairLJGromacsCoulGromacs, public ThrO private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp index f1c7d2faf9450a83c9be561322796a92b31d8680..abdc4c5ccffeee0f40ee78f35bdb936a7da8c967 100644 --- a/src/USER-OMP/pair_lj_gromacs_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJGromacsOMP::PairLJGromacsOMP(LAMMPS *lmp) : - PairLJGromacs(lmp), ThrOMP(lmp, PAIR) + PairLJGromacs(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJGromacsOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJGromacsOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJGromacsOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -85,10 +80,11 @@ void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -151,8 +147,8 @@ void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_omp.h index d192a414ef56cf6b4f27d4c969cd8eb6c06cd702..8e0f4bd2810507a4f81935240be669782a19b061 100644 --- a/src/USER-OMP/pair_lj_gromacs_omp.h +++ b/src/USER-OMP/pair_lj_gromacs_omp.h @@ -39,7 +39,7 @@ class PairLJGromacsOMP : public PairLJGromacs, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp index 55ee908e474e5ac9312769af1d390509b4449361..47cc23bf91a28728e9843d2bbcb8cdf5893a1f39 100644 --- a/src/USER-OMP/pair_lj_sf_omp.cpp +++ b/src/USER-OMP/pair_lj_sf_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJShiftedForceOMP::PairLJShiftedForceOMP(LAMMPS *lmp) : - PairLJShiftedForce(lmp), ThrOMP(lmp, PAIR) + PairLJShiftedForce(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJShiftedForceOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -142,8 +138,8 @@ void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_sf_omp.h b/src/USER-OMP/pair_lj_sf_omp.h index 6fba43fb8f8837551f7bb01d2050617fd20e1d73..c73c8f746b855c26d4d7af8a0c478f305948a120 100644 --- a/src/USER-OMP/pair_lj_sf_omp.h +++ b/src/USER-OMP/pair_lj_sf_omp.h @@ -39,7 +39,7 @@ class PairLJShiftedForceOMP : public PairLJShiftedForce, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_smooth_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp index 1ad88044a622c9c7a5749aaa2de7edd57581cf25..4bf9ceb41c9900bdfdf91b0cc7952263b150c3bc 100644 --- a/src/USER-OMP/pair_lj_smooth_omp.cpp +++ b/src/USER-OMP/pair_lj_smooth_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJSmoothOMP::PairLJSmoothOMP(LAMMPS *lmp) : - PairLJSmooth(lmp), ThrOMP(lmp, PAIR) + PairLJSmooth(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJSmoothOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJSmoothOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJSmoothOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -85,10 +80,11 @@ void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -155,8 +151,8 @@ void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_smooth_omp.h b/src/USER-OMP/pair_lj_smooth_omp.h index de27a4008d7063a75e6e958a6805dc838ecaf13f..eb6eb92decaf544bf72338a1d9f556bb8c6eb453 100644 --- a/src/USER-OMP/pair_lj_smooth_omp.h +++ b/src/USER-OMP/pair_lj_smooth_omp.h @@ -39,7 +39,7 @@ class PairLJSmoothOMP : public PairLJSmooth, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_morse_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp index a53e35a9775e55168a8737c19c1ceb18b6cf9621..f61fd4e3835f4285ee8ca34dff6fdd7a7a23815d 100644 --- a/src/USER-OMP/pair_morse_omp.cpp +++ b/src/USER-OMP/pair_morse_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairMorseOMP::PairMorseOMP(LAMMPS *lmp) : - PairMorse(lmp), ThrOMP(lmp, PAIR) + PairMorse(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairMorseOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairMorseOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid) +void PairMorseOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -139,8 +135,8 @@ void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_morse_omp.h b/src/USER-OMP/pair_morse_omp.h index a966e6f11fd8f60463e345ee88895d1070c5be76..a20aad6716cfaddce3a16a68253e83fa1daff6be 100644 --- a/src/USER-OMP/pair_morse_omp.h +++ b/src/USER-OMP/pair_morse_omp.h @@ -39,7 +39,7 @@ class PairMorseOMP : public PairMorse, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp index 7cb1e83086b3ea74f6029b58887db6b96e73b077..e052271e4f9af918845b365088193d89b988e521 100644 --- a/src/USER-OMP/pair_peri_lps_omp.cpp +++ b/src/USER-OMP/pair_peri_lps_omp.cpp @@ -26,15 +26,18 @@ #include "modify.h" #include "neighbor.h" #include "neigh_list.h" +#include "math_const.h" using namespace LAMMPS_NS; +using namespace MathConst; /* ---------------------------------------------------------------------- */ PairPeriLPSOMP::PairPeriLPSOMP(LAMMPS *lmp) : - PairPeriLPS(lmp), ThrOMP(lmp, PAIR) + PairPeriLPS(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; + fix_name = "PERI_NEIGH_OMP"; } /* ---------------------------------------------------------------------- */ @@ -43,7 +46,6 @@ void PairPeriLPSOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -61,38 +63,34 @@ void PairPeriLPSOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) +void PairPeriLPSOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz; @@ -103,9 +101,10 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double *vfrac = atom->vfrac; @@ -151,7 +150,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; - + delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; @@ -182,7 +181,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) // of the bond-based theory used in PMB model double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) / - (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]); + (MY_PI * cutsq[itype][jtype] * cutsq[itype][jtype]); rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]); if (r > 0.0) fpair = -(rk/r); @@ -199,7 +198,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) evdwl = 0.5*rk*dr; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, - fpair*vfrac[i],delx,dely,delz,tid); + fpair*vfrac[i],delx,dely,delz,thr); } } f[i][0] += fxtmp; @@ -214,7 +213,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int idelta = 1 + nlocal/comm->nthreads; - iifrom = tid*idelta; + iifrom = thr->get_tid()*idelta; iito = iifrom + idelta; if (iito > nlocal) iito = nlocal; @@ -234,7 +233,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) #endif { // communicate dilatation (theta) of each particle comm->forward_comm_pair(this); - // communicate wighted volume (wvolume) upon every reneighbor + // communicate weighted volume (wvolume) upon every reneighbor if (neighbor->ago == 0) comm->forward_comm_fix(modify->fix[ifix_peri]); } @@ -245,10 +244,8 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) { for (i = iifrom; i < iito; i++) { itype = type[i]; - if (eflag_global) - eng_vdwl_thr[tid] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]); - if (eflag_atom) - eatom_thr[tid][i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]); + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, + 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]), 0.0, thr); } } @@ -332,7 +329,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) omega_plus*(deviatoric_extension * deviatoric_extension) * vfrac[j] * vfrac_scale; if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0, - 0.5*fbond*vfrac[i],delx,dely,delz,tid); + 0.5*fbond*vfrac[i],delx,dely,delz,thr); // find stretch in bond I-J and break if necessary // use s0 from previous timestep diff --git a/src/USER-OMP/pair_peri_lps_omp.h b/src/USER-OMP/pair_peri_lps_omp.h index 2068830ca09666932fbd1307460aab0a90c05701..f234a4109814c9fde416069ebb369622e47be35c 100644 --- a/src/USER-OMP/pair_peri_lps_omp.h +++ b/src/USER-OMP/pair_peri_lps_omp.h @@ -43,7 +43,7 @@ class PairPeriLPSOMP : public PairPeriLPS, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp index 4e46d142d9be6375ad7b97c4c1ca009408be11de..96e991bab6eb22ef59f769e7da62b501c63bee90 100644 --- a/src/USER-OMP/pair_peri_pmb_omp.cpp +++ b/src/USER-OMP/pair_peri_pmb_omp.cpp @@ -32,9 +32,10 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairPeriPMBOMP::PairPeriPMBOMP(LAMMPS *lmp) : - PairPeriPMB(lmp), ThrOMP(lmp, PAIR) + PairPeriPMB(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; + fix_name = "PERI_NEIGH_OMP"; } /* ---------------------------------------------------------------------- */ @@ -43,7 +44,6 @@ void PairPeriPMBOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -59,38 +59,34 @@ void PairPeriPMBOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) +void PairPeriPMBOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz; @@ -101,9 +97,10 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double *vfrac = atom->vfrac; @@ -148,10 +145,11 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; - + delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; delx0 = xtmp0 - x0[j][0]; dely0 = ytmp0 - x0[j][1]; @@ -190,7 +188,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) evdwl = 0.5*rk*dr; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, - fpair*vfrac[i],delx,dely,delz,tid); + fpair*vfrac[i],delx,dely,delz,thr); } } f[i][0] += fxtmp; @@ -205,7 +203,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int idelta = 1 + nlocal/comm->nthreads; - iifrom = tid*idelta; + iifrom = thr->get_tid()*idelta; iito = iifrom + idelta; if (iito > nlocal) iito = nlocal; @@ -278,7 +276,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) evdwl = 0.5*rk*dr; if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0, - 0.5*fbond*vfrac[i],delx,dely,delz,tid); + 0.5*fbond*vfrac[i],delx,dely,delz,thr); // find stretch in bond I-J and break if necessary // use s0 from previous timestep @@ -291,13 +289,14 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch); else s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch)); + first = false; } } sync_threads(); - // store new s0 + // store new s0 (in parallel) for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; } diff --git a/src/USER-OMP/pair_peri_pmb_omp.h b/src/USER-OMP/pair_peri_pmb_omp.h index 9940e5ed15dafe63befa77636e6ddeab0be4a03f..8a7fc091d9aa7df290ee1246f0928ae9cf0803de 100644 --- a/src/USER-OMP/pair_peri_pmb_omp.h +++ b/src/USER-OMP/pair_peri_pmb_omp.h @@ -39,7 +39,7 @@ class PairPeriPMBOMP : public PairPeriPMB, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp index 4870553050b4b7f4186932b8ddd7a881f5ba69d6..cef5aaefc554a2f16d15109d1a6c96e531fd8589 100644 --- a/src/USER-OMP/pair_resquared_omp.cpp +++ b/src/USER-OMP/pair_resquared_omp.cpp @@ -27,7 +27,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairRESquaredOMP::PairRESquaredOMP(LAMMPS *lmp) : - PairRESquared(lmp), ThrOMP(lmp, PAIR) + PairRESquared(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -38,7 +38,6 @@ void PairRESquaredOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -46,40 +45,34 @@ void PairRESquaredOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); - else eval<1,1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); - else eval<1,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); - else eval<0,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces and torques into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int tid) +void PairRESquaredOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; @@ -87,11 +80,12 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int int *ilist,*jlist,*numneigh,**firstneigh; RE2Vars wi,wj; - double **x = atom->x; - int *ellipsoid = atom->ellipsoid; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const tor = thr->get_torque(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; @@ -105,6 +99,7 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int i = ilist[ii]; itype = type[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; // not a LJ sphere @@ -129,6 +124,8 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int // compute if less than cutoff if (rsq < cutsq[itype][jtype]) { + fforce[0] = fforce[1] = fforce[2] = 0.0; + switch (form[itype][jtype]) { case SPHERE_SPHERE: @@ -157,17 +154,17 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int case ELLIPSE_SPHERE: one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true); - tor[i][0] += ttor[0]*factor_lj; - tor[i][1] += ttor[1]*factor_lj; - tor[i][2] += ttor[2]*factor_lj; + t1tmp += ttor[0]*factor_lj; + t2tmp += ttor[1]*factor_lj; + t3tmp += ttor[2]*factor_lj; break; default: precompute_i(j,wj); one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor); - tor[i][0] += ttor[0]*factor_lj; - tor[i][1] += ttor[1]*factor_lj; - tor[i][2] += ttor[2]*factor_lj; + t1tmp += ttor[0]*factor_lj; + t2tmp += ttor[1]*factor_lj; + t3tmp += ttor[2]*factor_lj; if (NEWTON_PAIR || j < nlocal) { tor[j][0] += rtor[0]*factor_lj; tor[j][1] += rtor[1]*factor_lj; @@ -179,9 +176,9 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int fforce[0] *= factor_lj; fforce[1] *= factor_lj; fforce[2] *= factor_lj; - f[i][0] += fforce[0]; - f[i][1] += fforce[1]; - f[i][2] += fforce[2]; + fxtmp += fforce[0]; + fytmp += fforce[1]; + fztmp += fforce[2]; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= fforce[0]; @@ -193,9 +190,15 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fforce[0],fforce[1],fforce[2], - -r12[0],-r12[1],-r12[2],tid); + -r12[0],-r12[1],-r12[2],thr); } } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + tor[i][0] += t1tmp; + tor[i][1] += t2tmp; + tor[i][2] += t3tmp; } } diff --git a/src/USER-OMP/pair_resquared_omp.h b/src/USER-OMP/pair_resquared_omp.h index 2a50bb6dd0bc0a8c4e4305304a7ed72f5eb722cb..53a6e2e28f059e7a76449c325bdce7601a5a574c 100644 --- a/src/USER-OMP/pair_resquared_omp.h +++ b/src/USER-OMP/pair_resquared_omp.h @@ -39,7 +39,7 @@ class PairRESquaredOMP : public PairRESquared, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_soft_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp index 9f9673a28b5f7a38e90e3ce7127d5c5bd020a62f..cbc1c9f7fd6daf7d4571cda4d7d5e613754b6727 100644 --- a/src/USER-OMP/pair_soft_omp.cpp +++ b/src/USER-OMP/pair_soft_omp.cpp @@ -29,7 +29,7 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ PairSoftOMP::PairSoftOMP(LAMMPS *lmp) : - PairSoft(lmp), ThrOMP(lmp, PAIR) + PairSoft(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -40,7 +40,6 @@ void PairSoftOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -48,38 +47,34 @@ void PairSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid) +void PairSoftOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -88,10 +83,11 @@ void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -141,8 +137,8 @@ void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg*r)); - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_soft_omp.h b/src/USER-OMP/pair_soft_omp.h index 840d874601a88296d53b087156a1861cb6b334bc..1698089521794aee7dccc30e32d7ee85b77d38d4 100644 --- a/src/USER-OMP/pair_soft_omp.h +++ b/src/USER-OMP/pair_soft_omp.h @@ -39,7 +39,7 @@ class PairSoftOMP : public PairSoft, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp index 5d7f1a60d75fae86908ff12ea2ce2b8c2a0d6070..12aceed1d45d1c28b197e4688adb6edf9d68c9eb 100644 --- a/src/USER-OMP/pair_sw_omp.cpp +++ b/src/USER-OMP/pair_sw_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairSWOMP::PairSWOMP(LAMMPS *lmp) : - PairSW(lmp), ThrOMP(lmp, PAIR) + PairSW(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairSWOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,33 +43,29 @@ void PairSWOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - eval<1,1>(f, ifrom, ito, tid); + eval<1,1>(ifrom, ito, thr); } else { - eval<1,0>(f, ifrom, ito, tid); + eval<1,0>(ifrom, ito, thr); } - } else eval<0,0>(f, ifrom, ito, tid); + } else eval<0,0>(ifrom, ito, thr); - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG> -void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) +void PairSWOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,k,ii,jj,kk,jnum,jnumm1,itag,jtag; int itype,jtype,ktype,ijparam,ikparam,ijkparam; @@ -81,10 +76,11 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *tag = atom->tag; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const tag = atom->tag; + const int * const type = atom->type; + const int nlocal = atom->nlocal; ilist = list->ilist; numneigh = list->numneigh; @@ -92,7 +88,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) double fxtmp,fytmp,fztmp; - // loop over neighbors of my atoms + // loop over full neighbor list of my atoms for (ii = iifrom; ii < iito; ++ii) { @@ -144,7 +140,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) f[j][2] -= delz*fpair; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } jnumm1 = jnum - 1; @@ -189,7 +185,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) f[k][1] += fk[1]; f[k][2] += fk[2]; - if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,tid); + if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,thr); } f[j][0] += fjxtmp; f[j][1] += fjytmp; diff --git a/src/USER-OMP/pair_sw_omp.h b/src/USER-OMP/pair_sw_omp.h index 40052d7d4197401677ead18459089f022d39c364..c4af86007a3bd76648bc698d22be535fa4dc8d28 100644 --- a/src/USER-OMP/pair_sw_omp.h +++ b/src/USER-OMP/pair_sw_omp.h @@ -39,7 +39,7 @@ class PairSWOMP : public PairSW, public ThrOMP { private: template <int EVFLAG, int EFLAG> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_table_omp.cpp b/src/USER-OMP/pair_table_omp.cpp index 6b14d4c9813e61dae6452489017f0b1be62440cd..e8d63e590d35f98cde3171cb5246f4f13a07216e 100644 --- a/src/USER-OMP/pair_table_omp.cpp +++ b/src/USER-OMP/pair_table_omp.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairTableOMP::PairTableOMP(LAMMPS *lmp) : - PairTable(lmp), ThrOMP(lmp, PAIR) + PairTable(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,7 +37,6 @@ void PairTableOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -45,38 +44,34 @@ void PairTableOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) +void PairTableOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype,itable; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -89,10 +84,11 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -122,7 +118,7 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; - + if (rsq < cutsq[itype][jtype]) { tb = &tables[tabindex[itype][jtype]]; if (rsq < tb->innersq) @@ -181,7 +177,7 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } diff --git a/src/USER-OMP/pair_table_omp.h b/src/USER-OMP/pair_table_omp.h index 6fd1ce74a4effea9a7b9ce4dc54278a39c96b5a2..974149b9ac35cc92776126a6d4a9a2be9d1274d3 100644 --- a/src/USER-OMP/pair_table_omp.h +++ b/src/USER-OMP/pair_table_omp.h @@ -39,7 +39,7 @@ class PairTableOMP : public PairTable, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp index f59a8488f7a36117c876093be7dacce54ab0a7e4..fdbcd48292d208b9b2585189741052fe3c45dce6 100644 --- a/src/USER-OMP/pair_tersoff_omp.cpp +++ b/src/USER-OMP/pair_tersoff_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairTersoffOMP::PairTersoffOMP(LAMMPS *lmp) : - PairTersoff(lmp), ThrOMP(lmp, PAIR) + PairTersoff(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairTersoffOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = vflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,35 +43,31 @@ void PairTersoffOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } - } else eval<0,0,0>(f, ifrom, ito, tid); + } else eval<0,0,0>(ifrom, ito, thr); - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int VFLAG_ATOM> -void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) +void PairTersoffOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,k,ii,jj,kk,jnum; int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; @@ -84,10 +79,11 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *tag = atom->tag; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const tag = atom->tag; + const int * const type = atom->type; + const int nlocal = atom->nlocal; ilist = list->ilist; numneigh = list->numneigh; @@ -147,7 +143,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) f[j][2] -= delz*fpair; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } // three-body interactions @@ -199,7 +195,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) fjztmp -= delr1[2]*fpair; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0, - -fpair,-delr1[0],-delr1[1],-delr1[2],tid); + -fpair,-delr1[0],-delr1[1],-delr1[2],thr); // attractive term via loop over k @@ -229,7 +225,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) f[k][1] += fk[1]; f[k][2] += fk[2]; - if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,tid); + if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr); } f[j][0] += fjxtmp; f[j][1] += fjytmp; diff --git a/src/USER-OMP/pair_tersoff_omp.h b/src/USER-OMP/pair_tersoff_omp.h index 5e5dc066d2f50c15a3bfc522f325a0010a38dbe2..97c20548aff2356f00d111487a26a90d5f8b48eb 100644 --- a/src/USER-OMP/pair_tersoff_omp.h +++ b/src/USER-OMP/pair_tersoff_omp.h @@ -34,7 +34,7 @@ class PairTersoffOMP : public PairTersoff, public ThrOMP { private: template <int EVFLAG, int EFLAG, int VFLAG_ATOM> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp index 710ad9df187be033641661d2005e0cce50d2725a..6caa13ee90ec4880a4cde0c1aedbc204413a4bdf 100644 --- a/src/USER-OMP/pair_yukawa_colloid_omp.cpp +++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairYukawaColloidOMP::PairYukawaColloidOMP(LAMMPS *lmp) : - PairYukawaColloid(lmp), ThrOMP(lmp, PAIR) + PairYukawaColloid(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid) +void PairYukawaColloidOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj; @@ -84,11 +79,12 @@ void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - double *radius = atom->radius; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const radius = atom->radius; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -144,7 +140,7 @@ void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.h b/src/USER-OMP/pair_yukawa_colloid_omp.h index 9483cd15c1becdbe5a25c96ba13ad50bd0cb4bfb..c424e9eff354c7974c8f32230e6000525c50eb3e 100644 --- a/src/USER-OMP/pair_yukawa_colloid_omp.h +++ b/src/USER-OMP/pair_yukawa_colloid_omp.h @@ -39,7 +39,7 @@ class PairYukawaColloidOMP : public PairYukawaColloid, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_yukawa_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp index 1380e2239c1357a5484eaf2b3302f1ac3e99f246..210c7fcc1eaf2338db67aa040c0ceec5a644823b 100644 --- a/src/USER-OMP/pair_yukawa_omp.cpp +++ b/src/USER-OMP/pair_yukawa_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairYukawaOMP::PairYukawaOMP(LAMMPS *lmp) : - PairYukawa(lmp), ThrOMP(lmp, PAIR) + PairYukawa(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairYukawaOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairYukawaOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid) +void PairYukawaOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -141,8 +137,8 @@ void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_yukawa_omp.h b/src/USER-OMP/pair_yukawa_omp.h index e363ac6d1742d53d4747d364664be3b1f309598f..99abc569fa207274d9d8ea8ce2261d8f2d8b98b4 100644 --- a/src/USER-OMP/pair_yukawa_omp.h +++ b/src/USER-OMP/pair_yukawa_omp.h @@ -39,7 +39,7 @@ class PairYukawaOMP : public PairYukawa, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp index 37ce1f198b011f6392574ee9460dc90f7e75e7d4..19537868e5d491f52f445c00a44ba4ad8f3ae30a 100644 --- a/src/USER-OMP/thr_omp.cpp +++ b/src/USER-OMP/thr_omp.cpp @@ -16,213 +16,481 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ -#include "thr_omp.h" - -#include "memory.h" - #include "atom.h" #include "comm.h" +#include "error.h" #include "force.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" + +#include "thr_omp.h" #include "pair.h" +#include "bond.h" +#include "angle.h" #include "dihedral.h" - -#if defined(_OPENMP) -#include <omp.h> -#endif +#include "improper.h" +#include "kspace.h" #include "math_const.h" +#include <string.h> + using namespace LAMMPS_NS; using namespace MathConst; /* ---------------------------------------------------------------------- */ -ThrOMP::ThrOMP(LAMMPS *ptr, int style) : thr_style(style), lmp(ptr) +ThrOMP::ThrOMP(LAMMPS *ptr, int style) : lmp(ptr), fix(NULL), thr_style(style) { - // initialize fixed size per thread storage - eng_vdwl_thr = eng_coul_thr = eng_bond_thr = NULL; - virial_thr = NULL; - - lmp->memory->create(eng_vdwl_thr,lmp->comm->nthreads,"thr_omp:eng_vdwl_thr"); - lmp->memory->create(eng_coul_thr,lmp->comm->nthreads,"thr_omp:eng_coul_thr"); - lmp->memory->create(eng_bond_thr,lmp->comm->nthreads,"thr_omp:eng_bond_thr"); - lmp->memory->create(virial_thr,lmp->comm->nthreads,6,"thr_omp:virial_thr"); - - // variable size per thread, per atom storage - // the actually allocation happens via memory->grow() in ev_steup_thr() - maxeatom_thr = maxvatom_thr = 0; - evflag_global = evflag_atom = 0; - eatom_thr = NULL; - vatom_thr = NULL; + // register fix omp with this class + int ifix = lmp->modify->find_fix("package_omp"); + if (ifix < 0) + lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles"); + fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]); } /* ---------------------------------------------------------------------- */ ThrOMP::~ThrOMP() { - lmp->memory->destroy(eng_vdwl_thr); - lmp->memory->destroy(eng_coul_thr); - lmp->memory->destroy(eng_bond_thr); - lmp->memory->destroy(virial_thr); - lmp->memory->destroy(eatom_thr); - lmp->memory->destroy(vatom_thr); + // nothing to do? } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Hook up per thread per atom arrays into the tally infrastructure + ---------------------------------------------------------------------- */ -void ThrOMP::ev_setup_acc_thr(int ntotal, int eflag_global, int vflag_global, - int eflag_atom, int vflag_atom, int nthreads) +void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom, + double **vatom, ThrData *thr) { - int t,i; - - evflag_global = (eflag_global || vflag_global); - evflag_atom = (eflag_atom || vflag_atom); + const int tid = thr->get_tid(); - for (t = 0; t < nthreads; ++t) { + if (thr_style & THR_PAIR) { + if (eflag & 2) { + thr->eatom_pair = eatom + tid*nall; + memset(&(thr->eatom_pair[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_pair = vatom + tid*nall; + memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double)); + } + } + + if (thr_style & THR_BOND) { + if (eflag & 2) { + thr->eatom_bond = eatom + tid*nall; + memset(&(thr->eatom_bond[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_bond = vatom + tid*nall; + memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double)); + } + } - if (eflag_global) - eng_vdwl_thr[t] = eng_coul_thr[t] = eng_bond_thr[t] = 0.0; + if (thr_style & THR_ANGLE) { + if (eflag & 2) { + thr->eatom_angle = eatom + tid*nall; + memset(&(thr->eatom_angle[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_angle = vatom + tid*nall; + memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double)); + } + } - if (vflag_global) - for (i = 0; i < 6; ++i) - virial_thr[t][i] = 0.0; + if (thr_style & THR_DIHEDRAL) { + if (eflag & 2) { + thr->eatom_dihed = eatom + tid*nall; + memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_dihed = vatom + tid*nall; + memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double)); + } + } - if (eflag_atom) - for (i = 0; i < ntotal; ++i) - eatom_thr[t][i] = 0.0; - - if (vflag_atom) - for (i = 0; i < ntotal; ++i) { - vatom_thr[t][i][0] = 0.0; - vatom_thr[t][i][1] = 0.0; - vatom_thr[t][i][2] = 0.0; - vatom_thr[t][i][3] = 0.0; - vatom_thr[t][i][4] = 0.0; - vatom_thr[t][i][5] = 0.0; - } + if (thr_style & THR_IMPROPER) { + if (eflag & 2) { + thr->eatom_imprp = eatom + tid*nall; + memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_imprp = vatom + tid*nall; + memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double)); + } } -} -/* ---------------------------------------------------------------------- */ +#if 0 /* not supported (yet) */ + if (thr_style & THR_KSPACE) { + if (eflag & 2) { + thr->eatom_kspce = eatom + tid*nall; + memset(&(thr->eatom_kspce[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_kspce = vatom + tid*nall; + memset(&(thr->vatom_kspce[0][0]),0,nall*6*sizeof(double)); + } + } +#endif +} -void ThrOMP::ev_setup_thr(Dihedral *dihed) +/* ---------------------------------------------------------------------- + Reduce per thread data into the regular structures + Reduction of global properties is serialized with a "critical" + directive, so that only one thread at a time will access the + global variables. Since we are not synchronized, this should + come with little overhead. The reduction of per-atom properties + in contrast is parallelized over threads in the same way as forces. + ---------------------------------------------------------------------- */ + +void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, + ThrData *const thr, const int nproxy) { - int nthreads = lmp->comm->nthreads; + const int nlocal = lmp->atom->nlocal; + const int nghost = lmp->atom->nghost; + const int nall = nlocal + nghost; + const int nfirst = lmp->atom->nfirst; + const int nthreads = lmp->comm->nthreads; + const int evflag = eflag | vflag; + + const int tid = thr->get_tid(); + double **f = lmp->atom->f; + double **x = lmp->atom->x; + + switch (thr_style) { - // reallocate per-atom arrays if necessary - if (dihed->eflag_atom && lmp->atom->nmax > maxeatom_thr) { - maxeatom_thr = lmp->atom->nmax; - lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr"); + case THR_PAIR: { + Pair * const pair = lmp->force->pair; + + if (pair->vflag_fdotr) { + if (lmp->neighbor->includegroup == 0) + thr->virial_fdotr_compute(x, nlocal, nghost, -1); + else + thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); + } + + if (evflag) { +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + thr->eng_vdwl = 0.0; + thr->eng_coul = 0.0; + } + if (vflag & 3) + for (int i=0; i < 6; ++i) { + pair->virial[i] += thr->virial_pair[i]; + thr->virial_pair[i] = 0.0; + } + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } } - if (dihed->vflag_atom && lmp->atom->nmax > maxvatom_thr) { - maxvatom_thr = lmp->atom->nmax; - lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr"); + break; + + case THR_PAIR|THR_PROXY: { + Pair * const pair = lmp->force->pair; + + if (tid >= nproxy && pair->vflag_fdotr) { + if (lmp->neighbor->includegroup == 0) + thr->virial_fdotr_compute(x, nlocal, nghost, -1); + else + thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); + } + + if (evflag) { +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (tid < nproxy) { + // nothing to do for kspace? + if (vflag & 3) + for (int i=0; i < 6; ++i) { + thr->virial_pair[i] = 0.0; + } + } else { + if (eflag & 1) { + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + thr->eng_vdwl = 0.0; + thr->eng_coul = 0.0; + } + if (vflag & 3) + for (int i=0; i < 6; ++i) { + pair->virial[i] += thr->virial_pair[i]; + thr->virial_pair[i] = 0.0; + } + } + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } } + break; - int ntotal = (lmp->force->newton_bond) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; + case THR_BOND: - // set up per thread accumulators - ev_setup_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global, - dihed->eflag_atom, dihed->vflag_atom, nthreads); -} + if (evflag) { + Bond * const bond = lmp->force->bond; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + bond->energy += thr->eng_bond; + for (int i=0; i < 6; ++i) + bond->virial[i] += thr->virial_bond[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; -/* ---------------------------------------------------------------------- */ + case THR_ANGLE: -void ThrOMP::ev_setup_thr(Pair *pair) -{ - int nthreads = lmp->comm->nthreads; + if (evflag) { + Angle * const angle = lmp->force->angle; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + angle->energy += thr->eng_angle; + for (int i=0; i < 6; ++i) + angle->virial[i] += thr->virial_angle[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; - // reallocate per-atom arrays if necessary - if (pair->eflag_atom && lmp->atom->nmax > maxeatom_thr) { - maxeatom_thr = lmp->atom->nmax; - lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr"); - } - if (pair->vflag_atom && lmp->atom->nmax > maxvatom_thr) { - maxvatom_thr = lmp->atom->nmax; - lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr"); - } + case THR_DIHEDRAL: + + if (evflag) { + Dihedral * const dihedral = lmp->force->dihedral; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + dihedral->energy += thr->eng_dihed; + for (int i=0; i < 6; ++i) + dihedral->virial[i] += thr->virial_dihed[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; - int ntotal = (lmp->force->newton) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; + case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals + + if (evflag) { + Dihedral * const dihedral = lmp->force->dihedral; + Pair * const pair = lmp->force->pair; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + dihedral->energy += thr->eng_dihed; + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + } + + if (vflag & 3) { + for (int i=0; i < 6; ++i) { + dihedral->virial[i] += thr->virial_dihed[i]; + pair->virial[i] += thr->virial_pair[i]; + } + } + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; + + case THR_IMPROPER: + + if (evflag) { + Improper *improper = lmp->force->improper; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + improper->energy += thr->eng_imprp; + for (int i=0; i < 6; ++i) + improper->virial[i] += thr->virial_imprp[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; + + case THR_KSPACE|THR_PROXY: // fallthrough + case THR_KSPACE: + // nothing to do (for now) +#if 0 + if (evflag) { + KSpace *kspace = lmp->force->kspace; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + kspace->energy += thr->eng_kspce; + for (int i=0; i < 6; ++i) + kspace->virial[i] += thr->virial_kspce[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(kspace->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(kspace->vatom[0][0]), nall, nthreads, 6, tid); + } + } +#endif + break; - // set up per thread accumulators - ev_setup_acc_thr(ntotal, pair->eflag_global, pair->vflag_global, - pair->eflag_atom, pair->vflag_atom, nthreads); + default: + printf("tid:%d unhandled thr_style case %d\n", tid, thr_style); + break; + } + + if (style == fix->last_omp_style) { + sync_threads(); + data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + if (lmp->atom->torque) + data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid); + } } /* ---------------------------------------------------------------------- - reduce the per thread accumulated E/V data into the canonical accumulators. + tally eng_vdwl and eng_coul into per thread global and per-atom accumulators ------------------------------------------------------------------------- */ -void ThrOMP::ev_reduce_thr(Dihedral *dihed) + +void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double evdwl, const double ecoul, ThrData * const thr) { - int nthreads = lmp->comm->nthreads; - int ntotal = (lmp->force->newton_bond) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; - - for (int n = 0; n < nthreads; ++n) { - dihed->energy += eng_bond_thr[n]; - if (dihed->vflag_either) { - dihed->virial[0] += virial_thr[n][0]; - dihed->virial[1] += virial_thr[n][1]; - dihed->virial[2] += virial_thr[n][2]; - dihed->virial[3] += virial_thr[n][3]; - dihed->virial[4] += virial_thr[n][4]; - dihed->virial[5] += virial_thr[n][5]; - if (dihed->vflag_atom) { - for (int i = 0; i < ntotal; ++i) { - dihed->vatom[i][0] += vatom_thr[n][i][0]; - dihed->vatom[i][1] += vatom_thr[n][i][1]; - dihed->vatom[i][2] += vatom_thr[n][i][2]; - dihed->vatom[i][3] += vatom_thr[n][i][3]; - dihed->vatom[i][4] += vatom_thr[n][i][4]; - dihed->vatom[i][5] += vatom_thr[n][i][5]; - } + if (pair->eflag_global) { + if (newton_pair) { + thr->eng_vdwl += evdwl; + thr->eng_coul += ecoul; + } else { + const double evdwlhalf = 0.5*evdwl; + const double ecoulhalf = 0.5*ecoul; + if (i < nlocal) { + thr->eng_vdwl += evdwlhalf; + thr->eng_coul += ecoulhalf; } - } - if (dihed->eflag_atom) { - for (int i = 0; i < ntotal; ++i) { - dihed->eatom[i] += eatom_thr[n][i]; + if (j < nlocal) { + thr->eng_vdwl += evdwlhalf; + thr->eng_coul += ecoulhalf; } } } + if (pair->eflag_atom) { + const double epairhalf = 0.5 * (evdwl + ecoul); + if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf; + if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf; + } +} + +/* helper functions */ +static void v_tally(double * const vout, const double * const vin) +{ + vout[0] += vin[0]; + vout[1] += vin[1]; + vout[2] += vin[2]; + vout[3] += vin[3]; + vout[4] += vin[4]; + vout[5] += vin[5]; +} + +static void v_tally(double * const vout, const double scale, const double * const vin) +{ + vout[0] += scale*vin[0]; + vout[1] += scale*vin[1]; + vout[2] += scale*vin[2]; + vout[3] += scale*vin[3]; + vout[4] += scale*vin[4]; + vout[5] += scale*vin[5]; } /* ---------------------------------------------------------------------- - reduce the per thread accumulated E/V data into the canonical accumulators. + tally virial into per thread global and per-atom accumulators ------------------------------------------------------------------------- */ -void ThrOMP::ev_reduce_thr(Pair *pair) +void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double * const v, ThrData * const thr) { - const int nthreads = lmp->comm->nthreads; - const int ntotal = (lmp->force->newton) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; - - for (int n = 0; n < nthreads; ++n) { - pair->eng_vdwl += eng_vdwl_thr[n]; - pair->eng_coul += eng_coul_thr[n]; - if (pair->vflag_either) { - pair->virial[0] += virial_thr[n][0]; - pair->virial[1] += virial_thr[n][1]; - pair->virial[2] += virial_thr[n][2]; - pair->virial[3] += virial_thr[n][3]; - pair->virial[4] += virial_thr[n][4]; - pair->virial[5] += virial_thr[n][5]; - if (pair->vflag_atom) { - for (int i = 0; i < ntotal; ++i) { - pair->vatom[i][0] += vatom_thr[n][i][0]; - pair->vatom[i][1] += vatom_thr[n][i][1]; - pair->vatom[i][2] += vatom_thr[n][i][2]; - pair->vatom[i][3] += vatom_thr[n][i][3]; - pair->vatom[i][4] += vatom_thr[n][i][4]; - pair->vatom[i][5] += vatom_thr[n][i][5]; - } - } + if (pair->vflag_global) { + double * const va = thr->virial_pair; + if (newton_pair) { + v_tally(va,v); + } else { + if (i < nlocal) v_tally(va,0.5,v); + if (j < nlocal) v_tally(va,0.5,v); } - if (pair->eflag_atom) { - for (int i = 0; i < ntotal; ++i) { - pair->eatom[i] += eatom_thr[n][i]; - } + } + + if (pair->vflag_atom) { + if (newton_pair || i < nlocal) { + double * const va = thr->vatom_pair[i]; + v_tally(va,0.5,v); + } + if (newton_pair || j < nlocal) { + double * const va = thr->vatom_pair[j]; + v_tally(va,0.5,v); } } } @@ -232,39 +500,17 @@ void ThrOMP::ev_reduce_thr(Pair *pair) need i < nlocal test since called by bond_quartic and dihedral_charmm ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal, - int newton_pair, double evdwl, double ecoul, - double fpair, double delx, double dely, - double delz, int tid) +void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, + const int newton_pair, const double evdwl, const double ecoul, + const double fpair, const double delx, const double dely, + const double delz, ThrData * const thr) { - double evdwlhalf,ecoulhalf,epairhalf,v[6]; - if (pair->eflag_either) { - if (pair->eflag_global) { - if (newton_pair) { - eng_vdwl_thr[tid] += evdwl; - eng_coul_thr[tid] += ecoul; - } else { - evdwlhalf = 0.5*evdwl; - ecoulhalf = 0.5*ecoul; - if (i < nlocal) { - eng_vdwl_thr[tid] += evdwlhalf; - eng_coul_thr[tid] += ecoulhalf; - } - if (j < nlocal) { - eng_vdwl_thr[tid] += evdwlhalf; - eng_coul_thr[tid] += ecoulhalf; - } - } - } - if (pair->eflag_atom) { - epairhalf = 0.5 * (evdwl + ecoul); - if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf; - if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf; - } - } + if (pair->eflag_either) + e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); if (pair->vflag_either) { + double v[6]; v[0] = delx*delx*fpair; v[1] = dely*dely*fpair; v[2] = delz*delz*fpair; @@ -272,52 +518,7 @@ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal, v[4] = delx*delz*fpair; v[5] = dely*delz*fpair; - if (pair->vflag_global) { - if (newton_pair) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; - } else { - if (i < nlocal) { - virial_thr[tid][0] += 0.5*v[0]; - virial_thr[tid][1] += 0.5*v[1]; - virial_thr[tid][2] += 0.5*v[2]; - virial_thr[tid][3] += 0.5*v[3]; - virial_thr[tid][4] += 0.5*v[4]; - virial_thr[tid][5] += 0.5*v[5]; - } - if (j < nlocal) { - virial_thr[tid][0] += 0.5*v[0]; - virial_thr[tid][1] += 0.5*v[1]; - virial_thr[tid][2] += 0.5*v[2]; - virial_thr[tid][3] += 0.5*v[3]; - virial_thr[tid][4] += 0.5*v[4]; - virial_thr[tid][5] += 0.5*v[5]; - } - } - } - - if (pair->vflag_atom) { - if (newton_pair || i < nlocal) { - vatom_thr[tid][i][0] += 0.5*v[0]; - vatom_thr[tid][i][1] += 0.5*v[1]; - vatom_thr[tid][i][2] += 0.5*v[2]; - vatom_thr[tid][i][3] += 0.5*v[3]; - vatom_thr[tid][i][4] += 0.5*v[4]; - vatom_thr[tid][i][5] += 0.5*v[5]; - } - if (newton_pair || j < nlocal) { - vatom_thr[tid][j][0] += 0.5*v[0]; - vatom_thr[tid][j][1] += 0.5*v[1]; - vatom_thr[tid][j][2] += 0.5*v[2]; - vatom_thr[tid][j][3] += 0.5*v[3]; - vatom_thr[tid][j][4] += 0.5*v[4]; - vatom_thr[tid][j][5] += 0.5*v[5]; - } - } + v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); } } @@ -326,39 +527,19 @@ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal, for virial, have delx,dely,delz and fx,fy,fz ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal, - int newton_pair, double evdwl, double ecoul, - double fx, double fy, double fz, - double delx, double dely, double delz, int tid) +void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double evdwl, const double ecoul, + const double fx, const double fy, const double fz, + const double delx, const double dely, const double delz, + ThrData * const thr) { - double evdwlhalf,ecoulhalf,epairhalf,v[6]; - if (pair->eflag_either) { - if (pair->eflag_global) { - if (newton_pair) { - eng_vdwl_thr[tid] += evdwl; - eng_coul_thr[tid] += ecoul; - } else { - evdwlhalf = 0.5*evdwl; - ecoulhalf = 0.5*ecoul; - if (i < nlocal) { - eng_vdwl_thr[tid] += evdwlhalf; - eng_coul_thr[tid] += ecoulhalf; - } - if (j < nlocal) { - eng_vdwl_thr[tid] += evdwlhalf; - eng_coul_thr[tid] += ecoulhalf; - } - } - } - if (pair->eflag_atom) { - epairhalf = 0.5 * (evdwl + ecoul); - if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf; - if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf; - } - } + if (pair->eflag_either) + e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); if (pair->vflag_either) { + double v[6]; v[0] = delx*fx; v[1] = dely*fy; v[2] = delz*fz; @@ -366,52 +547,7 @@ void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal, v[4] = delx*fz; v[5] = dely*fz; - if (pair->vflag_global) { - if (newton_pair) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; - } else { - if (i < nlocal) { - virial_thr[tid][0] += 0.5*v[0]; - virial_thr[tid][1] += 0.5*v[1]; - virial_thr[tid][2] += 0.5*v[2]; - virial_thr[tid][3] += 0.5*v[3]; - virial_thr[tid][4] += 0.5*v[4]; - virial_thr[tid][5] += 0.5*v[5]; - } - if (j < nlocal) { - virial_thr[tid][0] += 0.5*v[0]; - virial_thr[tid][1] += 0.5*v[1]; - virial_thr[tid][2] += 0.5*v[2]; - virial_thr[tid][3] += 0.5*v[3]; - virial_thr[tid][4] += 0.5*v[4]; - virial_thr[tid][5] += 0.5*v[5]; - } - } - } - - if (pair->vflag_atom) { - if (newton_pair || i < nlocal) { - vatom_thr[tid][i][0] += 0.5*v[0]; - vatom_thr[tid][i][1] += 0.5*v[1]; - vatom_thr[tid][i][2] += 0.5*v[2]; - vatom_thr[tid][i][3] += 0.5*v[3]; - vatom_thr[tid][i][4] += 0.5*v[4]; - vatom_thr[tid][i][5] += 0.5*v[5]; - } - if (newton_pair || j < nlocal) { - vatom_thr[tid][j][0] += 0.5*v[0]; - vatom_thr[tid][j][1] += 0.5*v[1]; - vatom_thr[tid][j][2] += 0.5*v[2]; - vatom_thr[tid][j][3] += 0.5*v[3]; - vatom_thr[tid][j][4] += 0.5*v[4]; - vatom_thr[tid][j][5] += 0.5*v[5]; - } - } + v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); } } @@ -421,25 +557,28 @@ void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal, virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double ecoul, - double *fj, double *fk, double *drji, double *drki, int tid) +void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k, + const double evdwl, const double ecoul, + const double * const fj, const double * const fk, + const double * const drji, const double * const drki, + ThrData * const thr) { - double epairthird,v[6]; - if (pair->eflag_either) { if (pair->eflag_global) { - eng_vdwl_thr[tid] += evdwl; - eng_coul_thr[tid] += ecoul; + thr->eng_vdwl += evdwl; + thr->eng_coul += ecoul; } if (pair->eflag_atom) { - epairthird = THIRD * (evdwl + ecoul); - eatom_thr[tid][i] += epairthird; - eatom_thr[tid][j] += epairthird; - eatom_thr[tid][k] += epairthird; + const double epairthird = THIRD * (evdwl + ecoul); + thr->eatom_pair[i] += epairthird; + thr->eatom_pair[j] += epairthird; + thr->eatom_pair[k] += epairthird; } } if (pair->vflag_either) { + double v[6]; + v[0] = drji[0]*fj[0] + drki[0]*fk[0]; v[1] = drji[1]*fj[1] + drki[1]*fk[1]; v[2] = drji[2]*fj[2] + drki[2]*fk[2]; @@ -447,21 +586,12 @@ void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double v[4] = drji[0]*fj[2] + drki[0]*fk[2]; v[5] = drji[1]*fj[2] + drki[1]*fk[2]; - if (pair->vflag_global) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; - } + if (pair->vflag_global) v_tally(thr->virial_pair,v); if (pair->vflag_atom) { - for (int n=0; n < 6; ++n) { - vatom_thr[tid][i][n] += THIRD*v[n]; - vatom_thr[tid][j][n] += THIRD*v[n]; - vatom_thr[tid][k][n] += THIRD*v[n]; - } + v_tally(thr->vatom_pair[i],THIRD,v); + v_tally(thr->vatom_pair[j],THIRD,v); + v_tally(thr->vatom_pair[k],THIRD,v); } } } @@ -471,20 +601,23 @@ void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double called by AIREBO potential, newton_pair is always on ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl, - double *fi, double *fj, double *fk, - double *drim, double *drjm, double *drkm,int tid) +void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j, + const int k, const int m, const double evdwl, + const double * const fi, const double * const fj, + const double * const fk, const double * const drim, + const double * const drjm, const double * const drkm, + ThrData * const thr) { - double epairfourth,v[6]; + double v[6]; if (pair->eflag_either) { - if (pair->eflag_global) eng_vdwl_thr[tid] += evdwl; + if (pair->eflag_global) thr->eng_vdwl += evdwl; if (pair->eflag_atom) { - epairfourth = 0.25 * evdwl; - eatom_thr[tid][i] += epairfourth; - eatom_thr[tid][j] += epairfourth; - eatom_thr[tid][k] += epairfourth; - eatom_thr[tid][m] += epairfourth; + const double epairfourth = 0.25 * evdwl; + thr->eatom_pair[i] += epairfourth; + thr->eatom_pair[j] += epairfourth; + thr->eatom_pair[k] += epairfourth; + thr->eatom_pair[m] += epairfourth; } } @@ -496,14 +629,10 @@ void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl, v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); - vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; - vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; - vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; - vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; - vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; - vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2]; - vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5]; + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); + v_tally(thr->vatom_pair[m],v); } } @@ -513,48 +642,248 @@ void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl, changes v values by dividing by n ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, double *v, int tid) +void ThrOMP::ev_tally_list_thr(Pair * const pair, const int n, + const int * const list, const double ecoul, + const double * const v, ThrData * const thr) { - int i,j; - if (pair->eflag_either) { - if (pair->eflag_global) eng_coul_thr[tid] += ecoul; + if (pair->eflag_global) thr->eng_coul += ecoul; if (pair->eflag_atom) { - double epairatom = ecoul/n; - for (i = 0; i < n; i++) eatom_thr[tid][list[i]] += epairatom; + double epairatom = ecoul/static_cast<double>(n); + for (int i = 0; i < n; i++) thr->eatom_pair[list[i]] += epairatom; } } if (pair->vflag_either) { - if (pair->vflag_global) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; - } + if (pair->vflag_global) + v_tally(thr->virial_pair,v); if (pair->vflag_atom) { - v[0] /= n; - v[1] /= n; - v[2] /= n; - v[3] /= n; - v[4] /= n; - v[5] /= n; - for (i = 0; i < n; i++) { - j = list[i]; - vatom_thr[tid][j][0] += v[0]; - vatom_thr[tid][j][1] += v[1]; - vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; - vatom_thr[tid][j][4] += v[4]; - vatom_thr[tid][j][5] += v[5]; + const double s = 1.0/static_cast<double>(n); + double vtmp[6]; + + vtmp[0] = s * v[0]; + vtmp[1] = s * v[1]; + vtmp[2] = s * v[2]; + vtmp[3] = s * v[3]; + vtmp[4] = s * v[4]; + vtmp[5] = s * v[5]; + + for (int i = 0; i < n; i++) { + const int j = list[i]; + v_tally(thr->vatom_pair[j],vtmp); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal, + const int newton_bond, const double ebond, const double fbond, + const double delx, const double dely, const double delz, + ThrData * const thr) +{ + if (bond->eflag_either) { + const double ebondhalf = 0.5*ebond; + if (newton_bond) { + if (bond->eflag_global) + thr->eng_bond += ebond; + if (bond->eflag_atom) { + thr->eatom_bond[i] += ebondhalf; + thr->eatom_bond[j] += ebondhalf; + } + } else { + if (bond->eflag_global) { + if (i < nlocal) thr->eng_bond += ebondhalf; + if (j < nlocal) thr->eng_bond += ebondhalf; + } + if (bond->eflag_atom) { + if (i < nlocal) thr->eatom_bond[i] += ebondhalf; + if (j < nlocal) thr->eatom_bond[j] += ebondhalf; + } + } + } + + if (bond->vflag_either) { + double v[6]; + + v[0] = delx*delx*fbond; + v[1] = dely*dely*fbond; + v[2] = delz*delz*fbond; + v[3] = delx*dely*fbond; + v[4] = delx*delz*fbond; + v[5] = dely*delz*fbond; + + if (bond->vflag_global) { + if (newton_bond) + v_tally(thr->virial_bond,v); + else { + if (i < nlocal) + v_tally(thr->virial_bond,0.5,v); + if (j < nlocal) + v_tally(thr->virial_bond,0.5,v); + } + } + + if (bond->vflag_atom) { + v[0] *= 0.5; + v[1] *= 0.5; + v[2] *= 0.5; + v[3] *= 0.5; + v[4] *= 0.5; + v[5] *= 0.5; + + if (newton_bond) { + v_tally(thr->vatom_bond[i],v); + v_tally(thr->vatom_bond[j],v); + } else { + if (j < nlocal) + v_tally(thr->vatom_bond[i],v); + if (j < nlocal) + v_tally(thr->vatom_bond[j],v); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3 +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k, + const int nlocal, const int newton_bond, const double eangle, + const double * const f1, const double * const f3, + const double delx1, const double dely1, const double delz1, + const double delx2, const double dely2, const double delz2, + ThrData * const thr) +{ + if (angle->eflag_either) { + const double eanglethird = THIRD*eangle; + if (newton_bond) { + if (angle->eflag_global) + thr->eng_angle += eangle; + if (angle->eflag_atom) { + thr->eatom_angle[i] += eanglethird; + thr->eatom_angle[j] += eanglethird; + thr->eatom_angle[k] += eanglethird; } + } else { + if (angle->eflag_global) { + if (i < nlocal) thr->eng_angle += eanglethird; + if (j < nlocal) thr->eng_angle += eanglethird; + if (k < nlocal) thr->eng_angle += eanglethird; + } + if (angle->eflag_atom) { + if (i < nlocal) thr->eatom_angle[i] += eanglethird; + if (j < nlocal) thr->eatom_angle[j] += eanglethird; + if (k < nlocal) thr->eatom_angle[k] += eanglethird; + } + } + } + + if (angle->vflag_either) { + double v[6]; + + v[0] = delx1*f1[0] + delx2*f3[0]; + v[1] = dely1*f1[1] + dely2*f3[1]; + v[2] = delz1*f1[2] + delz2*f3[2]; + v[3] = delx1*f1[1] + delx2*f3[1]; + v[4] = delx1*f1[2] + delx2*f3[2]; + v[5] = dely1*f1[2] + dely2*f3[2]; + + if (angle->vflag_global) { + if (newton_bond) { + v_tally(thr->virial_angle,v); + } else { + int cnt = 0; + if (i < nlocal) ++cnt; + if (j < nlocal) ++cnt; + if (k < nlocal) ++cnt; + v_tally(thr->virial_angle,cnt*THIRD,v); + } + } + + if (angle->vflag_atom) { + v[0] *= THIRD; + v[1] *= THIRD; + v[2] *= THIRD; + v[3] *= THIRD; + v[4] *= THIRD; + v[5] *= THIRD; + + if (newton_bond) { + v_tally(thr->vatom_angle[i],v); + v_tally(thr->vatom_angle[j],v); + v_tally(thr->vatom_angle[k],v); + } else { + if (j < nlocal) v_tally(thr->vatom_angle[i],v); + if (j < nlocal) v_tally(thr->vatom_angle[j],v); + if (k < nlocal) v_tally(thr->vatom_angle[k],v); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial from 1-3 repulsion of SDK angle into accumulators +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3, + const int nlocal, const int newton_bond, + const double epair, const double fpair, + const double delx, const double dely, + const double delz, ThrData * const thr) +{ + + if (angle->eflag_either) { + const double epairhalf = 0.5 * epair; + + if (angle->eflag_global) { + if (newton_bond || i1 < nlocal) + thr->eng_angle += epairhalf; + if (newton_bond || i3 < nlocal) + thr->eng_angle += epairhalf; + } + + if (angle->eflag_atom) { + if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf; + if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf; } } + + if (angle->vflag_either) { + double v[6]; + v[0] = delx*delx*fpair; + v[1] = dely*dely*fpair; + v[2] = delz*delz*fpair; + v[3] = delx*dely*fpair; + v[4] = delx*delz*fpair; + v[5] = dely*delz*fpair; + + if (angle->vflag_global) { + double * const va = thr->virial_angle; + if (newton_bond || i1 < nlocal) v_tally(va,0.5,v); + if (newton_bond || i3 < nlocal) v_tally(va,0.5,v); + } + + if (angle->vflag_atom) { + if (newton_bond || i1 < nlocal) { + double * const va = thr->vatom_angle[i1]; + v_tally(va,0.5,v); + } + if (newton_bond || i3 < nlocal) { + double * const va = thr->vatom_angle[i3]; + v_tally(va,0.5,v); + } + } + } } + /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 @@ -562,40 +891,48 @@ void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, doubl = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4, - int nlocal, int newton_bond, - double edihedral, double *f1, double *f3, double *f4, - double vb1x, double vb1y, double vb1z, - double vb2x, double vb2y, double vb2z, - double vb3x, double vb3y, double vb3z, int tid) +void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2, + const int i3, const int i4, const int nlocal, + const int newton_bond, const double edihedral, + const double * const f1, const double * const f3, + const double * const f4, const double vb1x, + const double vb1y, const double vb1z, const double vb2x, + const double vb2y, const double vb2z, const double vb3x, + const double vb3y, const double vb3z, ThrData * const thr) { - double edihedralquarter,v[6]; - int cnt; if (dihed->eflag_either) { if (dihed->eflag_global) { if (newton_bond) { - eng_bond_thr[tid] += edihedral; + thr->eng_dihed += edihedral; } else { - edihedralquarter = 0.25*edihedral; - cnt = 0; + const double edihedralquarter = 0.25*edihedral; + int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; - eng_bond_thr[tid] += static_cast<double>(cnt) * edihedralquarter; + thr->eng_dihed += static_cast<double>(cnt)*edihedralquarter; } } if (dihed->eflag_atom) { - edihedralquarter = 0.25*edihedral; - if (newton_bond || i1 < nlocal) eatom_thr[tid][i1] += edihedralquarter; - if (newton_bond || i2 < nlocal) eatom_thr[tid][i2] += edihedralquarter; - if (newton_bond || i3 < nlocal) eatom_thr[tid][i3] += edihedralquarter; - if (newton_bond || i4 < nlocal) eatom_thr[tid][i4] += edihedralquarter; + const double edihedralquarter = 0.25*edihedral; + if (newton_bond) { + thr->eatom_dihed[i1] += edihedralquarter; + thr->eatom_dihed[i2] += edihedralquarter; + thr->eatom_dihed[i3] += edihedralquarter; + thr->eatom_dihed[i4] += edihedralquarter; + } else { + if (i1 < nlocal) thr->eatom_dihed[i1] += edihedralquarter; + if (i2 < nlocal) thr->eatom_dihed[i2] += edihedralquarter; + if (i3 < nlocal) thr->eatom_dihed[i3] += edihedralquarter; + if (i4 < nlocal) thr->eatom_dihed[i4] += edihedralquarter; + } } } if (dihed->vflag_either) { + double v[6]; v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; @@ -605,80 +942,127 @@ void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4, if (dihed->vflag_global) { if (newton_bond) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; + v_tally(thr->virial_dihed,v); } else { - if (i1 < nlocal) { - virial_thr[tid][0] += 0.25*v[0]; - virial_thr[tid][1] += 0.25*v[1]; - virial_thr[tid][2] += 0.25*v[2]; - virial_thr[tid][3] += 0.25*v[3]; - virial_thr[tid][4] += 0.25*v[4]; - virial_thr[tid][5] += 0.25*v[5]; - } - if (i2 < nlocal) { - virial_thr[tid][0] += 0.25*v[0]; - virial_thr[tid][1] += 0.25*v[1]; - virial_thr[tid][2] += 0.25*v[2]; - virial_thr[tid][3] += 0.25*v[3]; - virial_thr[tid][4] += 0.25*v[4]; - virial_thr[tid][5] += 0.25*v[5]; - } - if (i3 < nlocal) { - virial_thr[tid][0] += 0.25*v[0]; - virial_thr[tid][1] += 0.25*v[1]; - virial_thr[tid][2] += 0.25*v[2]; - virial_thr[tid][3] += 0.25*v[3]; - virial_thr[tid][4] += 0.25*v[4]; - virial_thr[tid][5] += 0.25*v[5]; - } - if (i4 < nlocal) { - virial_thr[tid][0] += 0.25*v[0]; - virial_thr[tid][1] += 0.25*v[1]; - virial_thr[tid][2] += 0.25*v[2]; - virial_thr[tid][3] += 0.25*v[3]; - virial_thr[tid][4] += 0.25*v[4]; - virial_thr[tid][5] += 0.25*v[5]; - } + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + v_tally(thr->virial_dihed,0.25*static_cast<double>(cnt),v); } } + v[0] *= 0.25; + v[1] *= 0.25; + v[2] *= 0.25; + v[3] *= 0.25; + v[4] *= 0.25; + v[5] *= 0.25; + if (dihed->vflag_atom) { - if (newton_bond || i1 < nlocal) { - vatom_thr[tid][i1][0] += 0.25*v[0]; - vatom_thr[tid][i1][1] += 0.25*v[1]; - vatom_thr[tid][i1][2] += 0.25*v[2]; - vatom_thr[tid][i1][3] += 0.25*v[3]; - vatom_thr[tid][i1][4] += 0.25*v[4]; - vatom_thr[tid][i1][5] += 0.25*v[5]; - } - if (newton_bond || i2 < nlocal) { - vatom_thr[tid][i2][0] += 0.25*v[0]; - vatom_thr[tid][i2][1] += 0.25*v[1]; - vatom_thr[tid][i2][2] += 0.25*v[2]; - vatom_thr[tid][i2][3] += 0.25*v[3]; - vatom_thr[tid][i2][4] += 0.25*v[4]; - vatom_thr[tid][i2][5] += 0.25*v[5]; + if (newton_bond) { + v_tally(thr->vatom_dihed[i1],v); + v_tally(thr->vatom_dihed[i2],v); + v_tally(thr->vatom_dihed[i3],v); + v_tally(thr->vatom_dihed[i4],v); + } else { + if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v); + if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v); + if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v); + if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v); } - if (newton_bond || i3 < nlocal) { - vatom_thr[tid][i3][0] += 0.25*v[0]; - vatom_thr[tid][i3][1] += 0.25*v[1]; - vatom_thr[tid][i3][2] += 0.25*v[2]; - vatom_thr[tid][i3][3] += 0.25*v[3]; - vatom_thr[tid][i3][4] += 0.25*v[4]; - vatom_thr[tid][i3][5] += 0.25*v[5]; - } - if (newton_bond || i4 < nlocal) { - vatom_thr[tid][i4][0] += 0.25*v[0]; - vatom_thr[tid][i4][1] += 0.25*v[1]; - vatom_thr[tid][i4][2] += 0.25*v[2]; - vatom_thr[tid][i4][3] += 0.25*v[3]; - vatom_thr[tid][i4][4] += 0.25*v[4]; - vatom_thr[tid][i4][5] += 0.25*v[5]; + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2, + const int i3, const int i4, const int nlocal, + const int newton_bond, const double eimproper, + const double * const f1, const double * const f3, + const double * const f4, const double vb1x, + const double vb1y, const double vb1z, const double vb2x, + const double vb2y, const double vb2z, const double vb3x, + const double vb3y, const double vb3z, ThrData * const thr) +{ + + if (imprp->eflag_either) { + if (imprp->eflag_global) { + if (newton_bond) { + thr->eng_imprp += eimproper; + } else { + const double eimproperquarter = 0.25*eimproper; + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + thr->eng_imprp += static_cast<double>(cnt)*eimproperquarter; + } + } + if (imprp->eflag_atom) { + const double eimproperquarter = 0.25*eimproper; + if (newton_bond) { + thr->eatom_imprp[i1] += eimproperquarter; + thr->eatom_imprp[i2] += eimproperquarter; + thr->eatom_imprp[i3] += eimproperquarter; + thr->eatom_imprp[i4] += eimproperquarter; + } else { + if (i1 < nlocal) thr->eatom_imprp[i1] += eimproperquarter; + if (i2 < nlocal) thr->eatom_imprp[i2] += eimproperquarter; + if (i3 < nlocal) thr->eatom_imprp[i3] += eimproperquarter; + if (i4 < nlocal) thr->eatom_imprp[i4] += eimproperquarter; + } + } + } + + if (imprp->vflag_either) { + double v[6]; + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (imprp->vflag_global) { + if (newton_bond) { + v_tally(thr->virial_imprp,v); + } else { + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + v_tally(thr->virial_imprp,0.25*static_cast<double>(cnt),v); + } + } + + v[0] *= 0.25; + v[1] *= 0.25; + v[2] *= 0.25; + v[3] *= 0.25; + v[4] *= 0.25; + v[5] *= 0.25; + + if (imprp->vflag_atom) { + if (newton_bond) { + v_tally(thr->vatom_imprp[i1],v); + v_tally(thr->vatom_imprp[i2],v); + v_tally(thr->vatom_imprp[i3],v); + v_tally(thr->vatom_imprp[i4],v); + } else { + if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v); + if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v); + if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v); + if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v); } } } @@ -690,7 +1074,8 @@ void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4, fpair is magnitude of force on atom I ------------------------------------------------------------------------- */ -void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid) +void ThrOMP::v_tally2_thr(const int i, const int j, const double fpair, + const double * const drij, ThrData * const thr) { double v[6]; @@ -701,10 +1086,8 @@ void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid) v[4] = 0.5 * drij[0]*drij[2]*fpair; v[5] = 0.5 * drij[1]*drij[2]*fpair; - vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; - vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; - vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); } /* ---------------------------------------------------------------------- @@ -712,8 +1095,10 @@ void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid) called by AIREBO and Tersoff potential, newton_pair is always on ------------------------------------------------------------------------- */ -void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj, - double *drik, double *drjk, int tid) +void ThrOMP::v_tally3_thr(const int i, const int j, const int k, + const double * const fi, const double * const fj, + const double * const drik, const double * const drjk, + ThrData * const thr) { double v[6]; @@ -724,12 +1109,9 @@ void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj, v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]); v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]); - vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; - vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; - vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; - vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; - vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); } /* ---------------------------------------------------------------------- @@ -737,9 +1119,11 @@ void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj, called by AIREBO potential, newton_pair is always on ------------------------------------------------------------------------- */ -void ThrOMP::v_tally4_thr(int i, int j, int k, int m, - double *fi, double *fj, double *fk, - double *drim, double *drjm, double *drkm, int tid) +void ThrOMP::v_tally4_thr(const int i, const int j, const int k, const int m, + const double * const fi, const double * const fj, + const double * const fk, const double * const drim, + const double * const drjm, const double * const drkm, + ThrData * const thr) { double v[6]; @@ -750,84 +1134,17 @@ void ThrOMP::v_tally4_thr(int i, int j, int k, int m, v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); - vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; - vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; - vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; - vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; - vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; - vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2]; - vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5]; -} - -/* ---------------------------------------------------------------------- */ - -// set loop range thread id, and force array offset for threaded runs. -double **ThrOMP::loop_setup_thr(double **f, int &ifrom, int &ito, int &tid, - int inum, int nall, int nthreads) -{ -#if defined(_OPENMP) - tid = omp_get_thread_num(); - - // each thread works on a fixed chunk of atoms. - const int idelta = 1 + inum/nthreads; - ifrom = tid*idelta; - ito = ifrom + idelta; - if (ito > inum) - ito = inum; - - return f + nall*tid; -#else - tid = 0; - ifrom = 0; - ito = inum; - return f; -#endif -} - -/* ---------------------------------------------------------------------- */ - -// reduce per thread data into the first part of the data -// array that is used for the non-threaded parts and reset -// the temporary storage to 0.0. this routine depends on -// multi-dimensional arrays like force stored in this order -// x1,y1,z1,x2,y2,z2,... -// we need to post a barrier to wait until all threads are done -// with writing to the array . -void ThrOMP::data_reduce_thr(double *dall, int nall, int nthreads, - int ndim, int tid) -{ -#if defined(_OPENMP) - // NOOP in non-threaded execution. - if (nthreads == 1) return; -#pragma omp barrier - { - const int nvals = ndim*nall; - const int idelta = nvals/nthreads + 1; - const int ifrom = tid*idelta; - const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); - - for (int m = ifrom; m < ito; ++m) { - for (int n = 1; n < nthreads; ++n) { - dall[m] += dall[n*nvals + m]; - dall[n*nvals + m] = 0.0; - } - } - } -#else - // NOOP in non-threaded execution. - return; -#endif + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); + v_tally(thr->vatom_pair[m],v); } /* ---------------------------------------------------------------------- */ double ThrOMP::memory_usage_thr() { - const int nthreads=lmp->comm->nthreads; - - double bytes = nthreads * (3 + 7) * sizeof(double); - bytes += nthreads * maxeatom_thr * sizeof(double); - bytes += nthreads * maxvatom_thr * 6 * sizeof(double); + double bytes=0.0; + return bytes; } diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h index 9966c9de00e85bd77e54e7034ec22deb992816d2..32f7045124b4a4dc75734c105d14c587a72a75ff 100644 --- a/src/USER-OMP/thr_omp.h +++ b/src/USER-OMP/thr_omp.h @@ -19,39 +19,28 @@ #define LMP_THR_OMP_H #include "pointers.h" +#include "fix_omp.h" +#include "thr_data.h" namespace LAMMPS_NS { // forward declarations class Pair; +class Bond; +class Angle; class Dihedral; +class Improper; +class KSpace; +class Fix; class ThrOMP { - public: - struct global { - double eng_vdwl; - double eng_coul; - double eng_bond; - double virial[6]; - }; protected: - const int thr_style; - enum {PAIR=1, BOND, ANGLE, DIHEDRAL, IMPROPER, KSPACE, FIX, COMPUTE}; - - LAMMPS *lmp; // reference to base lammps object. - - double *eng_vdwl_thr; // per thread accumulated vdw energy - double *eng_coul_thr; // per thread accumulated coulomb energies - double *eng_bond_thr; // per thread accumlated bonded energy + LAMMPS *lmp; // reference to base lammps object. + FixOMP *fix; // pointer to fix_omp; - double **virial_thr; // per thread virial - double **eatom_thr; // per thread per atom energy - double ***vatom_thr; // per thread per atom virial + const int thr_style; - int maxeatom_thr, maxvatom_thr; - int evflag_global, evflag_atom; - public: ThrOMP(LAMMPS *, int); virtual ~ThrOMP(); @@ -65,50 +54,105 @@ class ThrOMP { { ; } }; + enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2, + THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5, + THR_CHARMM=1<<6,THR_PROXY=1<<7,THR_HYBRID=1<<8,THR_FIX=1<<9}; + protected: - // extra ev_tally work for threaded styles - void ev_setup_thr(Pair *); - void ev_setup_thr(Dihedral *); + // extra ev_tally setup work for threaded styles + void ev_setup_thr(int, int, int, double *, double **, ThrData *); - void ev_reduce_thr(Pair *); - void ev_reduce_thr(Dihedral *); + // compute global per thread virial contribution from per-thread force + void virial_fdotr_compute_thr(double * const, const double * const * const, + const double * const * const, + const int, const int, const int); - private: - // internal method to be used by multiple ev_setup_thr() methods - void ev_setup_acc_thr(int, int, int, int, int, int); + // reduce per thread data as needed + void reduce_thr(void * const style, const int eflag, const int vflag, ThrData * const thr, const int nproxy=0); protected: + // threading adapted versions of the ev_tally infrastructure // style specific versions (need access to style class flags) - void ev_tally_thr(Pair *, int, int, int, int, double, double, - double, double, double, double, int); - void ev_tally_xyz_thr(Pair *, int, int, int, int, double, double, - double, double, double, double, double, double, int); - void ev_tally3_thr(Pair *, int, int, int, double, double, - double *, double *, double *, double *, int); - void ev_tally4_thr(Pair *, int, int, int, int, double, - double *, double *, double *, - double *, double *, double *, int); - void ev_tally_list_thr(Pair *, int, int *, double , double *, int); - - void ev_tally_thr(Dihedral *, int, int, int, int, int, int, double, - double *, double *, double *, double, double, double, - double, double, double, double, double, double, int); - // style independent versions - void v_tally2_thr(int, int, double, double *, int); - void v_tally3_thr(int, int, int, double *, double *, double *, double *, int); - void v_tally4_thr(int, int, int, int, double *, double *, double *, - double *, double *, double *, int); + // Pair + void e_tally_thr(Pair * const, const int, const int, const int, + const int, const double, const double, ThrData * const); + void v_tally_thr(Pair * const, const int, const int, const int, + const int, const double * const, ThrData * const); + + void ev_tally_thr(Pair * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, const double, ThrData * const); + void ev_tally_xyz_thr(Pair * const, const int, const int, const int, + const int, const double, const double, const double, + const double, const double, const double, + const double, const double, ThrData * const); + void ev_tally3_thr(Pair * const, const int, const int, const int, const double, + const double, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void ev_tally4_thr(Pair * const, const int, const int, const int, const int, + const double, const double * const, const double * const, + const double * const, const double * const, const double * const, + const double * const, ThrData * const); + + // Bond + void ev_tally_thr(Bond * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, ThrData * const); + + // Angle + void ev_tally_thr(Angle * const, const int, const int, const int, const int, const int, + const double, const double * const, const double * const, + const double, const double, const double, const double, const double, + const double, ThrData * const thr); + void ev_tally13_thr(Angle * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, ThrData * const thr); + + // Dihedral + void ev_tally_thr(Dihedral * const, const int, const int, const int, const int, const int, + const int, const double, const double * const, const double * const, + const double * const, const double, const double, const double, + const double, const double, const double, const double, const double, + const double, ThrData * const); + + // Improper + void ev_tally_thr(Improper * const, const int, const int, const int, const int, const int, + const int, const double, const double * const, const double * const, + const double * const, const double, const double, const double, + const double, const double, const double, const double, const double, + const double, ThrData * const); - protected: - // set loop range, thread id, and force array offset for threaded runs. - double **loop_setup_thr(double **, int &, int &, int &, int, int, int); - - // reduce per thread data into the first part of the array - void data_reduce_thr(double *, int, int, int, int); + // style independent versions + void v_tally2_thr(const int, const int, const double, const double * const, ThrData * const); + void v_tally3_thr(const int, const int, const int, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void v_tally4_thr(const int, const int, const int, const int, const double * const, + const double * const, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void ev_tally_list_thr(Pair * const, const int, const int * const, + const double , const double * const , ThrData * const); }; +// set loop range thread id, and force array offset for threaded runs. +static inline void loop_setup_thr(int &ifrom, int &ito, int &tid, + int inum, int nthreads, int nproxy=0) +{ +#if defined(_OPENMP) + tid = omp_get_thread_num(); + + // each thread works on a fixed chunk of atoms. + const int idelta = 1 + inum/(nthreads-nproxy); + ifrom = (tid-nproxy)*idelta; + ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + tid = 0; + ifrom = 0; + ito = inum; +#endif +} + } #endif