From 389584998643627304e1a0c4a9140f7325e898ab Mon Sep 17 00:00:00 2001 From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa> Date: Thu, 1 Dec 2011 17:05:22 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7264 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/USER-OMP/Install.sh | 19 +- src/USER-OMP/Package.sh | 38 +- src/USER-OMP/dihedral_charmm_omp.cpp | 53 +- src/USER-OMP/dihedral_charmm_omp.h | 6 +- src/USER-OMP/dihedral_class2_omp.cpp | 47 +- src/USER-OMP/dihedral_class2_omp.h | 4 +- .../dihedral_cosine_shift_exp_omp.cpp | 41 +- src/USER-OMP/dihedral_cosine_shift_exp_omp.h | 4 +- src/USER-OMP/dihedral_harmonic_omp.cpp | 40 +- src/USER-OMP/dihedral_harmonic_omp.h | 6 +- src/USER-OMP/dihedral_helix_omp.cpp | 53 +- src/USER-OMP/dihedral_helix_omp.h | 4 +- src/USER-OMP/dihedral_multi_harmonic_omp.cpp | 39 +- src/USER-OMP/dihedral_multi_harmonic_omp.h | 4 +- src/USER-OMP/dihedral_opls_omp.cpp | 41 +- src/USER-OMP/dihedral_opls_omp.h | 4 +- src/USER-OMP/fix_nve_sphere_omp.cpp | 46 +- src/USER-OMP/fix_shear_history_omp.cpp | 2 +- src/USER-OMP/pair_adp_omp.cpp | 76 +- src/USER-OMP/pair_adp_omp.h | 3 +- src/USER-OMP/pair_born_coul_long_omp.cpp | 40 +- src/USER-OMP/pair_born_coul_long_omp.h | 2 +- src/USER-OMP/pair_born_omp.cpp | 36 +- src/USER-OMP/pair_born_omp.h | 2 +- src/USER-OMP/pair_buck_coul_cut_omp.cpp | 39 +- src/USER-OMP/pair_buck_coul_cut_omp.h | 2 +- src/USER-OMP/pair_buck_coul_long_omp.cpp | 37 +- src/USER-OMP/pair_buck_coul_long_omp.h | 2 +- src/USER-OMP/pair_buck_coul_omp.cpp | 42 +- src/USER-OMP/pair_buck_coul_omp.h | 2 +- src/USER-OMP/pair_buck_omp.cpp | 36 +- src/USER-OMP/pair_buck_omp.h | 2 +- src/USER-OMP/pair_cdeam_omp.cpp | 108 +- src/USER-OMP/pair_cdeam_omp.h | 3 +- src/USER-OMP/pair_colloid_omp.cpp | 42 +- src/USER-OMP/pair_colloid_omp.h | 2 +- src/USER-OMP/pair_coul_cut_omp.cpp | 46 +- src/USER-OMP/pair_coul_cut_omp.h | 2 +- src/USER-OMP/pair_coul_debye_omp.cpp | 46 +- src/USER-OMP/pair_coul_debye_omp.h | 2 +- src/USER-OMP/pair_coul_long_omp.cpp | 46 +- src/USER-OMP/pair_coul_long_omp.h | 2 +- src/USER-OMP/pair_dipole_cut_omp.cpp | 53 +- src/USER-OMP/pair_dipole_cut_omp.h | 2 +- src/USER-OMP/pair_dipole_sf_omp.cpp | 53 +- src/USER-OMP/pair_dipole_sf_omp.h | 2 +- src/USER-OMP/pair_dpd_omp.cpp | 54 +- src/USER-OMP/pair_dpd_omp.h | 2 +- src/USER-OMP/pair_dpd_tstat_omp.cpp | 52 +- src/USER-OMP/pair_dpd_tstat_omp.h | 2 +- src/USER-OMP/pair_eam_omp.cpp | 65 +- src/USER-OMP/pair_eam_omp.h | 2 +- src/USER-OMP/pair_edip_omp.cpp | 44 +- src/USER-OMP/pair_edip_omp.h | 2 +- src/USER-OMP/pair_eim_omp.cpp | 82 +- src/USER-OMP/pair_eim_omp.h | 2 +- src/USER-OMP/pair_gauss_omp.cpp | 45 +- src/USER-OMP/pair_gauss_omp.h | 2 +- src/USER-OMP/pair_gayberne_omp.cpp | 66 +- src/USER-OMP/pair_gayberne_omp.h | 2 +- src/USER-OMP/pair_gran_hertz_history_omp.cpp | 51 +- src/USER-OMP/pair_gran_hertz_history_omp.h | 2 +- src/USER-OMP/pair_gran_hooke_history_omp.cpp | 93 +- src/USER-OMP/pair_gran_hooke_history_omp.h | 2 +- src/USER-OMP/pair_gran_hooke_omp.cpp | 50 +- src/USER-OMP/pair_gran_hooke_omp.h | 2 +- src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp | 53 +- src/USER-OMP/pair_hbond_dreiding_lj_omp.h | 2 +- .../pair_hbond_dreiding_morse_omp.cpp | 53 +- src/USER-OMP/pair_hbond_dreiding_morse_omp.h | 2 +- src/USER-OMP/pair_lj96_cut_omp.cpp | 44 +- src/USER-OMP/pair_lj96_cut_omp.h | 2 +- ...air_lj_charmm_coul_charmm_implicit_omp.cpp | 53 +- .../pair_lj_charmm_coul_charmm_implicit_omp.h | 2 +- .../pair_lj_charmm_coul_charmm_omp.cpp | 53 +- src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h | 2 +- src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp | 48 +- src/USER-OMP/pair_lj_charmm_coul_long_omp.h | 2 +- src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp | 50 +- src/USER-OMP/pair_lj_class2_coul_cut_omp.h | 2 +- src/USER-OMP/pair_lj_class2_coul_long_omp.cpp | 48 +- src/USER-OMP/pair_lj_class2_coul_long_omp.h | 2 +- src/USER-OMP/pair_lj_class2_omp.cpp | 44 +- src/USER-OMP/pair_lj_class2_omp.h | 2 +- src/USER-OMP/pair_lj_coul_omp.cpp | 52 +- src/USER-OMP/pair_lj_coul_omp.h | 2 +- src/USER-OMP/pair_lj_cubic_omp.cpp | 44 +- src/USER-OMP/pair_lj_cubic_omp.h | 2 +- src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp | 52 +- src/USER-OMP/pair_lj_cut_coul_cut_omp.h | 2 +- src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp | 50 +- src/USER-OMP/pair_lj_cut_coul_debye_omp.h | 2 +- src/USER-OMP/pair_lj_cut_coul_long_omp.cpp | 50 +- src/USER-OMP/pair_lj_cut_coul_long_omp.h | 2 +- .../pair_lj_cut_coul_long_tip4p_omp.cpp | 52 +- .../pair_lj_cut_coul_long_tip4p_omp.h | 3 +- src/USER-OMP/pair_lj_cut_omp.cpp | 44 +- src/USER-OMP/pair_lj_cut_omp.h | 2 +- src/USER-OMP/pair_lj_expand_omp.cpp | 44 +- src/USER-OMP/pair_lj_expand_omp.h | 2 +- .../pair_lj_gromacs_coul_gromacs_omp.cpp | 48 +- .../pair_lj_gromacs_coul_gromacs_omp.h | 2 +- src/USER-OMP/pair_lj_gromacs_omp.cpp | 44 +- src/USER-OMP/pair_lj_gromacs_omp.h | 2 +- src/USER-OMP/pair_lj_sf_omp.cpp | 44 +- src/USER-OMP/pair_lj_sf_omp.h | 2 +- src/USER-OMP/pair_lj_smooth_omp.cpp | 44 +- src/USER-OMP/pair_lj_smooth_omp.h | 2 +- src/USER-OMP/pair_morse_omp.cpp | 44 +- src/USER-OMP/pair_morse_omp.h | 2 +- src/USER-OMP/pair_peri_lps_omp.cpp | 59 +- src/USER-OMP/pair_peri_lps_omp.h | 2 +- src/USER-OMP/pair_peri_pmb_omp.cpp | 51 +- src/USER-OMP/pair_peri_pmb_omp.h | 2 +- src/USER-OMP/pair_resquared_omp.cpp | 73 +- src/USER-OMP/pair_resquared_omp.h | 2 +- src/USER-OMP/pair_soft_omp.cpp | 44 +- src/USER-OMP/pair_soft_omp.h | 2 +- src/USER-OMP/pair_sw_omp.cpp | 40 +- src/USER-OMP/pair_sw_omp.h | 2 +- src/USER-OMP/pair_table_omp.cpp | 44 +- src/USER-OMP/pair_table_omp.h | 2 +- src/USER-OMP/pair_tersoff_omp.cpp | 44 +- src/USER-OMP/pair_tersoff_omp.h | 2 +- src/USER-OMP/pair_yukawa_colloid_omp.cpp | 44 +- src/USER-OMP/pair_yukawa_colloid_omp.h | 2 +- src/USER-OMP/pair_yukawa_omp.cpp | 44 +- src/USER-OMP/pair_yukawa_omp.h | 2 +- src/USER-OMP/thr_omp.cpp | 1401 ++++++++++------- src/USER-OMP/thr_omp.h | 152 +- 130 files changed, 2525 insertions(+), 2395 deletions(-) diff --git a/src/USER-OMP/Install.sh b/src/USER-OMP/Install.sh index db0beb5218..ec6fac4b92 100644 --- a/src/USER-OMP/Install.sh +++ b/src/USER-OMP/Install.sh @@ -1,10 +1,11 @@ # Install/unInstall package files in LAMMPS # do not install child files if parent does not exist -for file in *_omp.cpp *_omp.h; do +for file in *_omp.cpp *_omp.h pppm*proxy.h pppm*proxy.cpp; do # let us see if the "rain man" can count the toothpicks... - ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` - + ofile=`echo $file | sed -e s,_pppm_tip4p_omp,_long_tip4p_omp, \ + -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \ + -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` if (test $1 = 1) then if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then : # always install those files. @@ -18,3 +19,15 @@ for file in *_omp.cpp *_omp.h; do rm -f ../$file fi done + +if (test $1 = 1) then + + cp thr_data.h .. + cp thr_data.cpp .. + +elif (test $1 = 0) then + + rm -f ../thr_data.h + rm -f ../thr_data.cpp + +fi diff --git a/src/USER-OMP/Package.sh b/src/USER-OMP/Package.sh index 5a004c9187..6f577b2791 100644 --- a/src/USER-OMP/Package.sh +++ b/src/USER-OMP/Package.sh @@ -1,22 +1,40 @@ # Update package files in LAMMPS -# cp package file to src if doesn't exist or is different -# do not copy certain files if non-OMP versions do not exist -# do remove OpenMP style files that have no matching -# non-OpenMP version installed, e.g. after a package has been removed - -for file in *_omp.cpp *_omp.h; do +# copy package file to src if it doesn't exists or is different +# do not copy OpenMP style files, if a non-OpenMP version does +# not exist. Do remove OpenMP style files that have no matching +# non-OpenMP version installed, e.g. after a package has been +# removed +for file in *_omp.cpp *_omp.h pppm*proxy.h pppm*proxy.cpp thr_data.h thr_data.cpp; do # let us see if the "rain man" can count the toothpicks... - ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` - if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then - : # always check for those files. + ofile=`echo $file | sed -e s,_pppm_tip4p_omp,_long_tip4p_omp, \ + -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \ + -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` + if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") \ + || (test $file = "thr_data.h") || (test $file = "thr_data.cpp") then + if (test ! -e ../$file) then + echo " creating src/$file" + cp $file .. + elif ! cmp -s $file ../$file ; then + echo " updating src/$file" + cp $file .. + fi elif (test ! -e ../$ofile) then if (test -e ../$file) then echo " removing src/$file" rm -f ../$file fi - continue + else + if (test ! -e ../$file) then + echo " creating src/$file" + cp $file .. + elif ! cmp -s $file ../$file ; then + echo " updating src/$file" + cp $file .. + fi fi +done +for file in thr_data.h thr_data.cpp; do if (test ! -e ../$file) then echo " creating src/$file" cp $file .. diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp index 63bfc43270..b4d7e2e4ad 100644 --- a/src/USER-OMP/dihedral_charmm_omp.cpp +++ b/src/USER-OMP/dihedral_charmm_omp.cpp @@ -40,7 +40,6 @@ void DihedralCharmmOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; // insure pair->ev_tally() will use 1-4 virial contribution @@ -53,43 +52,34 @@ void DihedralCharmmOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - - // reduce contributions to non-bonded energy terms - for (int n = 0; n < nthreads; ++n) { - force->pair->eng_vdwl += eng_vdwl_thr[n]; - force->pair->eng_coul += eng_coul_thr[n]; - } } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralCharmmOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,i,m,n,type; @@ -105,12 +95,13 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - double *q = atom->q; - int *atomtype = atom->type; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const atomtype = atom->type; + const int * const * const dihedrallist = neighbor->dihedrallist; + const double qqrd2e = force->qqrd2e; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -176,7 +167,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -282,7 +273,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); // 1-4 LJ and Coulomb interactions // tally energy/virial in pair, using newton_bond as newton flag @@ -321,7 +312,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) } if (EVFLAG) ev_tally_thr(force->pair,i1,i4,nlocal,NEWTON_BOND, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } } diff --git a/src/USER-OMP/dihedral_charmm_omp.h b/src/USER-OMP/dihedral_charmm_omp.h index a39ad83f7e..75ba6410d5 100644 --- a/src/USER-OMP/dihedral_charmm_omp.h +++ b/src/USER-OMP/dihedral_charmm_omp.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -33,13 +33,13 @@ class DihedralCharmmOMP : public DihedralCharmm, public ThrOMP { public: DihedralCharmmOMP(class LAMMPS *lmp) : - DihedralCharmm(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralCharmm(lmp), ThrOMP(lmp,THR_DIHEDRAL|THR_CHARMM) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp index 7348296644..07e0fba6e1 100644 --- a/src/USER-OMP/dihedral_class2_omp.cpp +++ b/src/USER-OMP/dihedral_class2_omp.cpp @@ -39,7 +39,6 @@ void DihedralClass2OMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -47,37 +46,34 @@ void DihedralClass2OMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralClass2OMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,i,j,k,n,type; @@ -96,9 +92,10 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -170,7 +167,7 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) sc2 = sqrt(sin2); if (sc2 < SMALL) sc2 = SMALL; sc2 = 1.0/sc2; - + s1 = sc1 * sc1; s2 = sc2 * sc2; s12 = sc1 * sc2; @@ -179,12 +176,12 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) // error check if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { - int me; - MPI_Comm_rank(world,&me); + int me = comm->me; + if (screen) { char str[128]; - sprintf(str,"Dihedral problem: %d " BIGINT_FORMAT " %d %d %d %d", - me,update->ntimestep, + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -526,7 +523,7 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral, fabcd[0],fabcd[2],fabcd[3], - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_class2_omp.h b/src/USER-OMP/dihedral_class2_omp.h index d26f2f8713..14a6c40edd 100644 --- a/src/USER-OMP/dihedral_class2_omp.h +++ b/src/USER-OMP/dihedral_class2_omp.h @@ -33,13 +33,13 @@ class DihedralClass2OMP : public DihedralClass2, public ThrOMP { public: DihedralClass2OMP(class LAMMPS *lmp) : - DihedralClass2(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralClass2(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp index a6c027e92d..1a80e8a7cd 100644 --- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp @@ -39,7 +39,6 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -47,37 +46,34 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,n,type; @@ -91,9 +87,10 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -159,7 +156,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -172,7 +169,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) me,x[i4][0],x[i4][1],x[i4][2]); } } - + if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; @@ -257,7 +254,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h index eb906ab953..54627c169b 100644 --- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h @@ -33,13 +33,13 @@ class DihedralCosineShiftExpOMP : public DihedralCosineShiftExp, public ThrOMP { public: DihedralCosineShiftExpOMP(class LAMMPS *lmp) : - DihedralCosineShiftExp(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralCosineShiftExp(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp index 0fa24090a7..cdad9b6ab8 100644 --- a/src/USER-OMP/dihedral_harmonic_omp.cpp +++ b/src/USER-OMP/dihedral_harmonic_omp.cpp @@ -39,7 +39,6 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -47,37 +46,34 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,i,m,n,type; @@ -90,9 +86,10 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -158,7 +155,7 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -264,7 +261,6 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } - diff --git a/src/USER-OMP/dihedral_harmonic_omp.h b/src/USER-OMP/dihedral_harmonic_omp.h index 2d7bae64ee..8b8562ad90 100644 --- a/src/USER-OMP/dihedral_harmonic_omp.h +++ b/src/USER-OMP/dihedral_harmonic_omp.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -33,13 +33,13 @@ class DihedralHarmonicOMP : public DihedralHarmonic, public ThrOMP { public: DihedralHarmonicOMP(class LAMMPS *lmp) : - DihedralHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp index 4ec701a0cb..b9b61982f9 100644 --- a/src/USER-OMP/dihedral_helix_omp.cpp +++ b/src/USER-OMP/dihedral_helix_omp.cpp @@ -42,7 +42,6 @@ void DihedralHelixOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -50,37 +49,34 @@ void DihedralHelixOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralHelixOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,n,type; @@ -94,9 +90,10 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -132,18 +129,18 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) domain->minimum_image(vb3x,vb3y,vb3z); // c0 calculation - + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); - + rb1 = sqrt(sb1); rb3 = sqrt(sb3); - + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; // 1st and 2nd angle - + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; b1mag = sqrt(b1mag2); b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; @@ -181,15 +178,16 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) cz = vb1x*vb2y - vb1y*vb2x; cmag = sqrt(cx*cx + cy*cy + cz*cz); dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag; - + // error check if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { int me = comm->me; + if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -202,7 +200,7 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) me,x[i4][0],x[i4][1],x[i4][2]); } } - + if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; @@ -217,7 +215,6 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) if (EFLAG) edihedral = aphi[type]*(1.0 - c) + bphi[type]*(1.0 + cos(3.0*phi)) + cphi[type]*(1.0 + cos(phi + MY_PI4)); -; a = pd; c = c * a; @@ -277,6 +274,6 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_helix_omp.h b/src/USER-OMP/dihedral_helix_omp.h index 7923197413..e932045cff 100644 --- a/src/USER-OMP/dihedral_helix_omp.h +++ b/src/USER-OMP/dihedral_helix_omp.h @@ -33,13 +33,13 @@ class DihedralHelixOMP : public DihedralHelix, public ThrOMP { public: DihedralHelixOMP(class LAMMPS *lmp) : - DihedralHelix(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralHelix(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp index bde958984e..822ddb7965 100644 --- a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp @@ -39,7 +39,6 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -47,37 +46,34 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralMultiHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,n,type; @@ -91,9 +87,10 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -180,7 +177,7 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -264,6 +261,6 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.h b/src/USER-OMP/dihedral_multi_harmonic_omp.h index da2322f038..628ad2a6a0 100644 --- a/src/USER-OMP/dihedral_multi_harmonic_omp.h +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.h @@ -33,13 +33,13 @@ class DihedralMultiHarmonicOMP : public DihedralMultiHarmonic, public ThrOMP { public: DihedralMultiHarmonicOMP(class LAMMPS *lmp) : - DihedralMultiHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralMultiHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp index 9f59e26d26..6e46575f3b 100644 --- a/src/USER-OMP/dihedral_opls_omp.cpp +++ b/src/USER-OMP/dihedral_opls_omp.cpp @@ -40,7 +40,6 @@ void DihedralOPLSOMP::compute(int eflag, int vflag) if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = 0; const int nall = atom->nlocal + atom->nghost; @@ -48,37 +47,34 @@ void DihedralOPLSOMP::compute(int eflag, int vflag) const int inum = neighbor->ndihedrallist; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int EFLAG, int NEWTON_BOND> -void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) +void DihedralOPLSOMP::eval(int nfrom, int nto, ThrData * const thr) { int i1,i2,i3,i4,n,type; @@ -92,9 +88,10 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) edihedral = 0.0; - double **x = atom->x; - int **dihedrallist = neighbor->dihedrallist; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; for (n = nfrom; n < nto; n++) { i1 = dihedrallist[n][0]; @@ -188,7 +185,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) if (screen) { char str[128]; sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", - me,tid,update->ntimestep, + me,thr->get_tid(),update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); error->warning(FLERR,str,0); fprintf(screen," 1st atom: %d %g %g %g\n", @@ -201,7 +198,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) me,x[i4][0],x[i4][1],x[i4][2]); } } - + if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; @@ -280,7 +277,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) if (EVFLAG) ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, - vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); } } diff --git a/src/USER-OMP/dihedral_opls_omp.h b/src/USER-OMP/dihedral_opls_omp.h index 58b9920538..44c76bb2ad 100644 --- a/src/USER-OMP/dihedral_opls_omp.h +++ b/src/USER-OMP/dihedral_opls_omp.h @@ -33,13 +33,13 @@ class DihedralOPLSOMP : public DihedralOPLS, public ThrOMP { public: DihedralOPLSOMP(class LAMMPS *lmp) : - DihedralOPLS(lmp), ThrOMP(lmp,DIHEDRAL) {}; + DihedralOPLS(lmp), ThrOMP(lmp,THR_DIHEDRAL) {}; virtual void compute(int, int); private: template <int EVFLAG, int EFLAG, int NEWTON_BOND> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/fix_nve_sphere_omp.cpp b/src/USER-OMP/fix_nve_sphere_omp.cpp index a642b21f22..93af055f81 100644 --- a/src/USER-OMP/fix_nve_sphere_omp.cpp +++ b/src/USER-OMP/fix_nve_sphere_omp.cpp @@ -34,26 +34,24 @@ enum{NONE,DIPOLE}; void FixNVESphereOMP::initial_integrate(int vflag) { - double **x = atom->x; - double **v = atom->v; - double **f = atom->f; - double **omega = atom->omega; - double **torque = atom->torque; - double *radius = atom->radius; - double *rmass = atom->rmass; - int *mask = atom->mask; - int nlocal = atom->nlocal; + double * const * const x = atom->x; + double * const * const v = atom->v; + const double * const * const f = atom->f; + double * const * const omega = atom->omega; + const double * const * const torque = atom->torque; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const int * const mask = atom->mask; + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; int i; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - // set timestep here since dt may have changed or come via rRESPA const double dtfrotate = dtf / INERTIA; // update v,x,omega for all particles // d_omega/dt = torque / inertia #if defined(_OPENMP) -#pragma omp parallel for private(i) default(shared) +#pragma omp parallel for private(i) default(none) #endif for (i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -77,9 +75,9 @@ void FixNVESphereOMP::initial_integrate(int vflag) // renormalize mu to dipole length if (extra == DIPOLE) { - double **mu = atom->mu; + double * const * const mu = atom->mu; #if defined(_OPENMP) -#pragma omp parallel for private(i) default(shared) +#pragma omp parallel for private(i) default(none) #endif for (i = 0; i < nlocal; i++) { double g0,g1,g2,msq,scale; @@ -103,18 +101,16 @@ void FixNVESphereOMP::initial_integrate(int vflag) void FixNVESphereOMP::final_integrate() { - double **v = atom->v; - double **f = atom->f; - double **omega = atom->omega; - double **torque = atom->torque; - double *rmass = atom->rmass; - double *radius = atom->radius; - int *mask = atom->mask; - int nlocal = atom->nlocal; + double * const * const v = atom->v; + const double * const * const f = atom->f; + double * const * const omega = atom->omega; + const double * const * const torque = atom->torque; + const double * const rmass = atom->rmass; + const double * const radius = atom->radius; + const int * const mask = atom->mask; + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; int i; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - // set timestep here since dt may have changed or come via rRESPA const double dtfrotate = dtf / INERTIA; @@ -123,7 +119,7 @@ void FixNVESphereOMP::final_integrate() // d_omega/dt = torque / inertia #if defined(_OPENMP) -#pragma omp parallel for private(i) default(shared) +#pragma omp parallel for private(i) default(none) #endif for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) { diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp index 40781cb407..4655dd1af7 100644 --- a/src/USER-OMP/fix_shear_history_omp.cpp +++ b/src/USER-OMP/fix_shear_history_omp.cpp @@ -47,7 +47,7 @@ void FixShearHistoryOMP::pre_exchange() int flag = 0; #if defined(_OPENMP) -#pragma omp parallel shared(flag) +#pragma omp parallel default(none) shared(flag) #endif { diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp index e91642e6ba..3af4a2f7cd 100644 --- a/src/USER-OMP/pair_adp_omp.cpp +++ b/src/USER-OMP/pair_adp_omp.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairADPOMP::PairADPOMP(LAMMPS *lmp) : - PairADP(lmp), ThrOMP(lmp, PAIR) + PairADP(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -39,10 +39,10 @@ void PairADPOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; - const int nall = atom->nlocal + atom->nghost; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; @@ -62,48 +62,39 @@ void PairADPOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, *rho_t, **mu_t, **lambda_t; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - if (force->newton_pair) { - rho_t = rho + tid*nall; - mu_t = mu + tid*nall; - lambda_t = lambda + tid*nall; - } else { - rho_t = rho + tid*atom->nlocal; - mu_t = mu + tid*atom->nlocal; - lambda_t = lambda + tid*atom->nlocal; - } + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_adp(nall, rho, mu, lambda); + else + thr->init_adp(nlocal, rho, mu, lambda); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); - else eval<1,1,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); - else eval<1,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); - else eval<0,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, - double **lambda_t, int iifrom, int iito, int tid) +void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,m,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -117,7 +108,13 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, evdwl = 0.0; - double **x = atom->x; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const * const mu_t = thr->get_mu(); + double * const * const lambda_t = thr->get_lambda(); + const int tid = thr->get_tid(); + int *type = atom->type; int nlocal = atom->nlocal; int nall = nlocal + atom->nghost; @@ -128,18 +125,6 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, numneigh = list->numneigh; firstneigh = list->firstneigh; - // zero out density - - if (NEWTON_PAIR) { - memset(rho_t, 0, nall*sizeof(double)); - memset(&(mu_t[0][0]), 0, 3*nall*sizeof(double)); - memset(&(lambda_t[0][0]), 0, 6*nall*sizeof(double)); - } else { - memset(rho_t, 0, nlocal*sizeof(double)); - memset(&(mu_t[0][0]), 0, 3*nlocal*sizeof(double)); - memset(&(lambda_t[0][0]), 0, 6*nlocal*sizeof(double)); - } - // rho = density at each atom // loop over neighbors of my atoms @@ -259,8 +244,7 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, lambda[i][4]+lambda[i][5]*lambda[i][5]); phi -= 1.0/6.0*(lambda[i][0]+lambda[i][1]+lambda[i][2])* (lambda[i][0]+lambda[i][1]+lambda[i][2]); - if (eflag_global) eng_vdwl_thr[tid] += phi; - if (eflag_atom) eatom_thr[tid][i] += phi; + e_tally_thr(this,i,i,nlocal,/* newton_pair */ 1, phi, 0.0, thr); } } @@ -384,7 +368,7 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, if (EFLAG) evdwl = phi; if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, - fx,fy,fz,delx,dely,delz,tid); + fx,fy,fz,delx,dely,delz,thr); } } f[i][0] += fxtmp; @@ -399,6 +383,6 @@ double PairADPOMP::memory_usage() { double bytes = memory_usage_thr(); bytes += PairADP::memory_usage(); - + bytes += (comm->nthreads-1) * nmax * (10*sizeof(double) + 3*sizeof(double *)); return bytes; } diff --git a/src/USER-OMP/pair_adp_omp.h b/src/USER-OMP/pair_adp_omp.h index f7d2509cd3..9a7f4023fb 100644 --- a/src/USER-OMP/pair_adp_omp.h +++ b/src/USER-OMP/pair_adp_omp.h @@ -39,8 +39,7 @@ class PairADPOMP : public PairADP, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double *rho_t, double **mu_t, double **lambda_t, - int iifrom, int iito, int tid); + void eval(int iifrom, int iito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_born_coul_long_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp index c277a080c0..cf409f3cfc 100644 --- a/src/USER-OMP/pair_born_coul_long_omp.cpp +++ b/src/USER-OMP/pair_born_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBornCoulLongOMP::PairBornCoulLongOMP(LAMMPS *lmp) : - PairBornCoulLong(lmp), ThrOMP(lmp, PAIR) + PairBornCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBornCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -95,9 +90,10 @@ void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; double *special_lj = force->special_lj; @@ -179,7 +175,7 @@ void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_born_coul_long_omp.h b/src/USER-OMP/pair_born_coul_long_omp.h index d6ccbfc680..3271c566a4 100644 --- a/src/USER-OMP/pair_born_coul_long_omp.h +++ b/src/USER-OMP/pair_born_coul_long_omp.h @@ -39,7 +39,7 @@ class PairBornCoulLongOMP : public PairBornCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_born_omp.cpp b/src/USER-OMP/pair_born_omp.cpp index c39d205c97..d9dbf0d29e 100644 --- a/src/USER-OMP/pair_born_omp.cpp +++ b/src/USER-OMP/pair_born_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBornOMP::PairBornOMP(LAMMPS *lmp) : - PairBorn(lmp), ThrOMP(lmp, PAIR) + PairBorn(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairBornOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairBornOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBornOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBornOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,7 +79,8 @@ void PairBornOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; @@ -143,7 +139,7 @@ void PairBornOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_born_omp.h b/src/USER-OMP/pair_born_omp.h index b24de4a577..7260644728 100644 --- a/src/USER-OMP/pair_born_omp.h +++ b/src/USER-OMP/pair_born_omp.h @@ -39,7 +39,7 @@ class PairBornOMP : public PairBorn, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp index ac47d478a0..235f1c4f2c 100644 --- a/src/USER-OMP/pair_buck_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBuckCoulCutOMP::PairBuckCoulCutOMP(LAMMPS *lmp) : - PairBuckCoulCut(lmp), ThrOMP(lmp, PAIR) + PairBuckCoulCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,35 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -86,8 +80,9 @@ void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; @@ -162,7 +157,7 @@ void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.h b/src/USER-OMP/pair_buck_coul_cut_omp.h index a77f3bad24..8fee0808c0 100644 --- a/src/USER-OMP/pair_buck_coul_cut_omp.h +++ b/src/USER-OMP/pair_buck_coul_cut_omp.h @@ -39,7 +39,7 @@ class PairBuckCoulCutOMP : public PairBuckCoulCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_buck_coul_long_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp index 6e7398ca44..083b9acc6e 100644 --- a/src/USER-OMP/pair_buck_coul_long_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBuckCoulLongOMP::PairBuckCoulLongOMP(LAMMPS *lmp) : - PairBuckCoulLong(lmp), ThrOMP(lmp, PAIR) + PairBuckCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,37 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -95,8 +91,9 @@ void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; @@ -178,7 +175,7 @@ void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_buck_coul_long_omp.h b/src/USER-OMP/pair_buck_coul_long_omp.h index 2c87904de8..a47e809eec 100644 --- a/src/USER-OMP/pair_buck_coul_long_omp.h +++ b/src/USER-OMP/pair_buck_coul_long_omp.h @@ -39,7 +39,7 @@ class PairBuckCoulLongOMP : public PairBuckCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_buck_coul_omp.cpp b/src/USER-OMP/pair_buck_coul_omp.cpp index bd171f628a..97299feeeb 100644 --- a/src/USER-OMP/pair_buck_coul_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_omp.cpp @@ -34,7 +34,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBuckCoulOMP::PairBuckCoulOMP(LAMMPS *lmp) : - PairBuckCoul(lmp), ThrOMP(lmp, PAIR) + PairBuckCoul(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -45,7 +45,6 @@ void PairBuckCoulOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -53,53 +52,50 @@ void PairBuckCoulOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckCoulOMP::eval(int iifrom, int iito, ThrData * const thr) { double evdwl,ecoul,fpair; evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; double *special_lj = force->special_lj; double qqrd2e = force->qqrd2e; - double *x0 = x[0]; + const double *x0 = x[0]; double *f0 = f[0], *fi = f0; int *ilist = list->ilist; @@ -129,7 +125,7 @@ void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid) ni = sbmask(j); j &= NEIGHMASK; - { register double *xj = x0+(j+(j<<1)); + { const register double *xj = x0+(j+(j<<1)); d[0] = xi[0] - xj[0]; // pair vector d[1] = xi[1] - xj[1]; d[2] = xi[2] - xj[2]; } @@ -214,7 +210,7 @@ void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,d[0],d[1],d[2],tid); + evdwl,ecoul,fpair,d[0],d[1],d[2],thr); } } } diff --git a/src/USER-OMP/pair_buck_coul_omp.h b/src/USER-OMP/pair_buck_coul_omp.h index dbff9b419a..823f64a4ab 100644 --- a/src/USER-OMP/pair_buck_coul_omp.h +++ b/src/USER-OMP/pair_buck_coul_omp.h @@ -39,7 +39,7 @@ class PairBuckCoulOMP : public PairBuckCoul, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_buck_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp index 66d8730abd..5806a3e796 100644 --- a/src/USER-OMP/pair_buck_omp.cpp +++ b/src/USER-OMP/pair_buck_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairBuckOMP::PairBuckOMP(LAMMPS *lmp) : - PairBuck(lmp), ThrOMP(lmp, PAIR) + PairBuck(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairBuckOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairBuckOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,7 +79,8 @@ void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; @@ -145,7 +141,7 @@ void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_buck_omp.h b/src/USER-OMP/pair_buck_omp.h index 40b6702e6f..c73e3f0d08 100644 --- a/src/USER-OMP/pair_buck_omp.h +++ b/src/USER-OMP/pair_buck_omp.h @@ -39,7 +39,7 @@ class PairBuckOMP : public PairBuck, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp index 01bd5f6eaa..287b39ceb1 100644 --- a/src/USER-OMP/pair_cdeam_omp.cpp +++ b/src/USER-OMP/pair_cdeam_omp.cpp @@ -44,7 +44,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairCDEAMOMP::PairCDEAMOMP(LAMMPS *lmp, int _cdeamVersion) : - PairCDEAM(lmp,_cdeamVersion), PairEAM(lmp), ThrOMP(lmp, PAIR) + PairEAM(lmp), PairCDEAM(lmp,_cdeamVersion), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -55,7 +55,6 @@ void PairCDEAMOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -78,22 +77,19 @@ void PairCDEAMOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, *rho_t, *rhoB_t, *D_values_t; - - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - if (force->newton_pair) { - rho_t = rho + tid*nall; - rhoB_t = rhoB + tid*nall; - D_values_t = D_values + tid*nall; - } else { - rho_t = rho + tid*atom->nlocal; - rhoB_t = rhoB + tid*atom->nlocal; - D_values_t = D_values + tid*atom->nlocal; - } + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_cdeam(nall, rho, rhoB, D_values); + else + thr->init_cdeam(atom->nlocal, rho, rhoB, D_values); switch (cdeamVersion) { @@ -101,15 +97,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag) if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<1,1,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr); + else eval<1,1,0,1>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<1,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr); + else eval<1,0,0,1>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<0,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr); + else eval<0,0,0,1>(ifrom, ito, thr); } break; @@ -117,15 +113,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag) if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<1,1,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr); + else eval<1,1,0,2>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<1,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr); + else eval<1,0,0,2>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); - else eval<0,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr); + else eval<0,0,0,2>(ifrom, ito, thr); } break; @@ -136,18 +132,12 @@ void PairCDEAMOMP::compute(int eflag, int vflag) error->all(FLERR,"unsupported eam/cd pair style variant"); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION> -void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, - double *D_values_t, int iifrom, int iito, int tid) +void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -156,10 +146,17 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const rhoB_t = thr->get_rhoB(); + double * const D_values_t = thr->get_D_values(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; double fxtmp,fytmp,fztmp; @@ -167,18 +164,6 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, numneigh = list->numneigh; firstneigh = list->firstneigh; - // zero out density - - if (NEWTON_PAIR) { - memset(rho_t, 0, nall*sizeof(double)); - memset(rhoB_t, 0, nall*sizeof(double)); - memset(D_values_t, 0, nall*sizeof(double)); - } else { - memset(rho_t, 0, nlocal*sizeof(double)); - memset(rhoB_t, 0, nlocal*sizeof(double)); - memset(D_values_t, 0, nlocal*sizeof(double)); - } - // Stage I // Compute rho and rhoB at each local atom site. @@ -240,10 +225,10 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, if (NEWTON_PAIR) { // reduce per thread density - data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); - data_reduce_thr(&(rhoB[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(rho, nall, nthreads, 1, tid); + data_reduce_thr(rhoB, nall, nthreads, 1, tid); if (CDEAMVERSION==1) - data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(D_values, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -259,10 +244,10 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, } else { // reduce per thread density - data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); - data_reduce_thr(&(rhoB[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(rho, nlocal, nthreads, 1, tid); + data_reduce_thr(rhoB, nlocal, nthreads, 1, tid); if (CDEAMVERSION==1) - data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(D_values, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -277,8 +262,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, fp[i] = FPrimeOfRho(index, type[i]); if(EFLAG) { phi = FofRho(index, type[i]); - if (eflag_global) eng_vdwl_thr[tid] += phi; - if (eflag_atom) eatom_thr[tid][i] += phi; + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); } } @@ -360,7 +344,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, } if (NEWTON_PAIR) { - data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(D_values, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -375,7 +359,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, sync_threads(); } else { - data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(D_values, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -525,7 +509,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, if(EFLAG) evdwl = phi; if(EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, - fpair,delx,dely,delz,tid); + fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_cdeam_omp.h b/src/USER-OMP/pair_cdeam_omp.h index 85b124cb17..46f460f8fa 100644 --- a/src/USER-OMP/pair_cdeam_omp.h +++ b/src/USER-OMP/pair_cdeam_omp.h @@ -40,8 +40,7 @@ class PairCDEAMOMP : public PairCDEAM, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION> - void eval(double **f, double *rho_t, double *rhoB_t, double *D_values_t, - int iifrom, int iito, int tid); + void eval(int iifrom, int iito, ThrData * const thr); }; /// The one-site concentration formulation of CD-EAM. diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp index c8bc74407a..7bfe1c04de 100644 --- a/src/USER-OMP/pair_colloid_omp.cpp +++ b/src/USER-OMP/pair_colloid_omp.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairColloidOMP::PairColloidOMP(LAMMPS *lmp) : - PairColloid(lmp), ThrOMP(lmp, PAIR) + PairColloid(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,7 +37,6 @@ void PairColloidOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -45,38 +44,34 @@ void PairColloidOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid) +void PairColloidOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -86,10 +81,11 @@ void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -204,7 +200,7 @@ void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } f[i][0] += fxtmp; f[i][1] += fytmp; diff --git a/src/USER-OMP/pair_colloid_omp.h b/src/USER-OMP/pair_colloid_omp.h index a0be13cbb4..cde7e9b650 100644 --- a/src/USER-OMP/pair_colloid_omp.h +++ b/src/USER-OMP/pair_colloid_omp.h @@ -39,7 +39,7 @@ class PairColloidOMP : public PairColloid, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_coul_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp index bb19db3d22..a8473eec38 100644 --- a/src/USER-OMP/pair_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_coul_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairCoulCutOMP::PairCoulCutOMP(LAMMPS *lmp) : - PairCoulCut(lmp), ThrOMP(lmp, PAIR) + PairCoulCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; @@ -86,12 +81,13 @@ void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -142,7 +138,7 @@ void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - 0.0,ecoul,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_coul_cut_omp.h b/src/USER-OMP/pair_coul_cut_omp.h index eca9958ff2..3499ee4ae6 100644 --- a/src/USER-OMP/pair_coul_cut_omp.h +++ b/src/USER-OMP/pair_coul_cut_omp.h @@ -39,7 +39,7 @@ class PairCoulCutOMP : public PairCoulCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_coul_debye_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp index 1c2e7b8e07..73e579262e 100644 --- a/src/USER-OMP/pair_coul_debye_omp.cpp +++ b/src/USER-OMP/pair_coul_debye_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairCoulDebyeOMP::PairCoulDebyeOMP(LAMMPS *lmp) : - PairCoulDebye(lmp), ThrOMP(lmp, PAIR) + PairCoulDebye(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; @@ -86,12 +81,13 @@ void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -144,7 +140,7 @@ void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - 0.0,ecoul,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_coul_debye_omp.h b/src/USER-OMP/pair_coul_debye_omp.h index 7ad599bb1b..f016de8b5d 100644 --- a/src/USER-OMP/pair_coul_debye_omp.h +++ b/src/USER-OMP/pair_coul_debye_omp.h @@ -39,7 +39,7 @@ class PairCoulDebyeOMP : public PairCoulDebye, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_coul_long_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp index 3a2e051591..82f070d37d 100644 --- a/src/USER-OMP/pair_coul_long_omp.cpp +++ b/src/USER-OMP/pair_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairCoulLongOMP::PairCoulLongOMP(LAMMPS *lmp) : - PairCoulLong(lmp), ThrOMP(lmp, PAIR) + PairCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itable,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; @@ -96,12 +91,13 @@ void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -181,7 +177,7 @@ void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - 0.0,ecoul,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_coul_long_omp.h b/src/USER-OMP/pair_coul_long_omp.h index 7b63f762f2..d7655637d0 100644 --- a/src/USER-OMP/pair_coul_long_omp.h +++ b/src/USER-OMP/pair_coul_long_omp.h @@ -39,7 +39,7 @@ class PairCoulLongOMP : public PairCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_dipole_cut_omp.cpp b/src/USER-OMP/pair_dipole_cut_omp.cpp index 9ba93b19b5..85079dd718 100644 --- a/src/USER-OMP/pair_dipole_cut_omp.cpp +++ b/src/USER-OMP/pair_dipole_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairDipoleCutOMP::PairDipoleCutOMP(LAMMPS *lmp) : - PairDipoleCut(lmp), ThrOMP(lmp, PAIR) + PairDipoleCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairDipoleCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,34 @@ void PairDipoleCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); - else eval<1,1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); - else eval<1,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); - else eval<0,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces and torques into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairDipoleCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; @@ -90,14 +83,16 @@ void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, i evdwl = 0.0; - double **x = atom->x; - double *q = atom->q; - double **mu = atom->mu; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const double * const q = atom->q; + const double * const * const mu = atom->mu; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; ilist = list->ilist; @@ -265,7 +260,7 @@ void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, i } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid); + evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_dipole_cut_omp.h b/src/USER-OMP/pair_dipole_cut_omp.h index 832bd4d3be..b175450c9f 100644 --- a/src/USER-OMP/pair_dipole_cut_omp.h +++ b/src/USER-OMP/pair_dipole_cut_omp.h @@ -39,7 +39,7 @@ class PairDipoleCutOMP : public PairDipoleCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_dipole_sf_omp.cpp b/src/USER-OMP/pair_dipole_sf_omp.cpp index 9ebc72d414..b920ff5c83 100644 --- a/src/USER-OMP/pair_dipole_sf_omp.cpp +++ b/src/USER-OMP/pair_dipole_sf_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairDipoleSFOMP::PairDipoleSFOMP(LAMMPS *lmp) : - PairDipoleSF(lmp), ThrOMP(lmp, PAIR) + PairDipoleSF(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairDipoleSFOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,34 @@ void PairDipoleSFOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); - else eval<1,1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); - else eval<1,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); - else eval<0,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces and torques into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; @@ -94,14 +87,16 @@ void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, in evdwl = 0.0; - double **x = atom->x; - double *q = atom->q; - double **mu = atom->mu; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const double * const q = atom->q; + const double * const * const mu = atom->mu; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; ilist = list->ilist; @@ -297,7 +292,7 @@ void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, in } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid); + evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_dipole_sf_omp.h b/src/USER-OMP/pair_dipole_sf_omp.h index e601e2d569..89c80fa788 100644 --- a/src/USER-OMP/pair_dipole_sf_omp.h +++ b/src/USER-OMP/pair_dipole_sf_omp.h @@ -39,7 +39,7 @@ class PairDipoleSFOMP : public PairDipoleSF, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_dpd_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp index be1e32f37d..0d24ce401d 100644 --- a/src/USER-OMP/pair_dpd_omp.cpp +++ b/src/USER-OMP/pair_dpd_omp.cpp @@ -29,7 +29,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairDPDOMP::PairDPDOMP(LAMMPS *lmp) : - PairDPD(lmp), ThrOMP(lmp, PAIR) + PairDPD(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; random_thr = NULL; @@ -54,7 +54,6 @@ void PairDPDOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -63,46 +62,46 @@ void PairDPDOMP::compute(int eflag, int vflag) if (!random_thr) random_thr = new RanMars*[nthreads]; - + + // to ensure full compatibility with the serial DPD style + // we use is random number generator instance for thread 0 random_thr[0] = random; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. if (random_thr && tid > 0) random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + comm->nprocs*tid); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid) +void PairDPDOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -112,14 +111,15 @@ void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - double **v = atom->v; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; - double dtinvsqrt = 1.0/sqrt(update->dt); + const double * const * const x = atom->x; + const double * const * const v = atom->v; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double *special_lj = force->special_lj; + const double dtinvsqrt = 1.0/sqrt(update->dt); double fxtmp,fytmp,fztmp; - RanMars &rng = *random_thr[tid]; + RanMars &rng = *random_thr[thr->get_tid()]; ilist = list->ilist; numneigh = list->numneigh; @@ -190,7 +190,7 @@ void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_dpd_omp.h b/src/USER-OMP/pair_dpd_omp.h index 9385e5444f..c3802f8e60 100644 --- a/src/USER-OMP/pair_dpd_omp.h +++ b/src/USER-OMP/pair_dpd_omp.h @@ -43,7 +43,7 @@ class PairDPDOMP : public PairDPD, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_dpd_tstat_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp index 7e3fb8b398..50a1bf439e 100644 --- a/src/USER-OMP/pair_dpd_tstat_omp.cpp +++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp @@ -29,7 +29,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairDPDTstatOMP::PairDPDTstatOMP(LAMMPS *lmp) : - PairDPDTstat(lmp), ThrOMP(lmp, PAIR) + PairDPDTstat(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; random_thr = NULL; @@ -54,7 +54,6 @@ void PairDPDTstatOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -64,45 +63,45 @@ void PairDPDTstatOMP::compute(int eflag, int vflag) if (!random_thr) random_thr = new RanMars*[nthreads]; + // to ensure full compatibility with the serial DPD style + // we use is random number generator instance for thread 0 random_thr[0] = random; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. if (random_thr && tid > 0) random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + comm->nprocs*tid); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid) +void PairDPDTstatOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -112,14 +111,15 @@ void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - double **v = atom->v; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; - double dtinvsqrt = 1.0/sqrt(update->dt); + const double * const * const x = atom->x; + const double * const * const v = atom->v; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double *special_lj = force->special_lj; + const double dtinvsqrt = 1.0/sqrt(update->dt); double fxtmp,fytmp,fztmp; - RanMars &rng = *random_thr[tid]; + RanMars &rng = *random_thr[thr->get_tid()]; // adjust sigma if target T is changing @@ -192,7 +192,7 @@ void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - 0.0,0.0,fpair,delx,dely,delz,tid); + 0.0,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_dpd_tstat_omp.h b/src/USER-OMP/pair_dpd_tstat_omp.h index 14f640a925..87c9de5505 100644 --- a/src/USER-OMP/pair_dpd_tstat_omp.h +++ b/src/USER-OMP/pair_dpd_tstat_omp.h @@ -43,7 +43,7 @@ class PairDPDTstatOMP : public PairDPDTstat, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp index 0ae4d54fb7..c014eb75e2 100644 --- a/src/USER-OMP/pair_eam_omp.cpp +++ b/src/USER-OMP/pair_eam_omp.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairEAMOMP::PairEAMOMP(LAMMPS *lmp) : - PairEAM(lmp), ThrOMP(lmp, PAIR) + PairEAM(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -39,7 +39,6 @@ void PairEAMOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -58,42 +57,39 @@ void PairEAMOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, *rho_t; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + if (force->newton_pair) - rho_t = rho + tid*nall; - else rho_t = rho + tid*atom->nlocal; + thr->init_eam(nall, rho); + else + thr->init_eam(atom->nlocal, rho); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, rho_t, ifrom, ito, tid); - else eval<1,1,0>(f, rho_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, rho_t, ifrom, ito, tid); - else eval<1,0,0>(f, rho_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, rho_t, ifrom, ito, tid); - else eval<0,0,0>(f, rho_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairEAMOMP::eval(double **f, double *rho_t, - int iifrom, int iito, int tid) +void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,m,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -103,10 +99,15 @@ void PairEAMOMP::eval(double **f, double *rho_t, evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; double fxtmp,fytmp,fztmp; @@ -114,11 +115,6 @@ void PairEAMOMP::eval(double **f, double *rho_t, numneigh = list->numneigh; firstneigh = list->firstneigh; - // zero out density - - if (NEWTON_PAIR) memset(rho_t, 0, nall*sizeof(double)); - else memset(rho_t, 0, nlocal*sizeof(double)); - // rho = density at each atom // loop over neighbors of my atoms @@ -164,7 +160,7 @@ void PairEAMOMP::eval(double **f, double *rho_t, if (NEWTON_PAIR) { // reduce per thread density - data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(rho, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -178,7 +174,7 @@ void PairEAMOMP::eval(double **f, double *rho_t, sync_threads(); } else { - data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(rho, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -198,8 +194,7 @@ void PairEAMOMP::eval(double **f, double *rho_t, fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2]; if (EFLAG) { phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; - if (eflag_global) eng_vdwl_thr[tid] += phi; - if (eflag_atom) eatom_thr[tid][i] += phi; + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); } } @@ -283,7 +278,7 @@ void PairEAMOMP::eval(double **f, double *rho_t, if (EFLAG) evdwl = phi; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_eam_omp.h b/src/USER-OMP/pair_eam_omp.h index 1184cb34bc..6b0f1274fc 100644 --- a/src/USER-OMP/pair_eam_omp.h +++ b/src/USER-OMP/pair_eam_omp.h @@ -39,7 +39,7 @@ class PairEAMOMP : public PairEAM, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double *rho_t, int iifrom, int iito, int tid); + void eval(int iifrom, int iito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp index 65b05c8143..f0d6d47cec 100644 --- a/src/USER-OMP/pair_edip_omp.cpp +++ b/src/USER-OMP/pair_edip_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairEDIPOMP::PairEDIPOMP(LAMMPS *lmp) : - PairEDIP(lmp), ThrOMP(lmp, PAIR) + PairEDIP(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairEDIPOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = vflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,35 +43,31 @@ void PairEDIPOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } - } else eval<0,0,0>(f, ifrom, ito, tid); + } else eval<0,0,0>(ifrom, ito, thr); - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int VFLAG_ATOM> -void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) +void PairEDIPOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,k,ii,inum,jnum; int itype,jtype,ktype,ijparam,ikparam,ijkparam; @@ -133,6 +128,8 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) double potentia3B_factor; double potential2B_factor; + const int tid = thr->get_tid(); + double *pre_thrInvR_ij = preInvR_ij + tid * leadDimInteractionList; double *pre_thrExp3B_ij = preExp3B_ij + tid * leadDimInteractionList; double *pre_thrExp3BDerived_ij = preExp3BDerived_ij + tid * leadDimInteractionList; @@ -141,9 +138,10 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) double *pre_thrPow2B_ij = prePow2B_ij + tid * leadDimInteractionList; double *pre_thrForceCoord = preForceCoord + tid * leadDimInteractionList; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; inum = list->inum; ilist = list->ilist; @@ -340,7 +338,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = (exp2B_ij * potential2B_factor); if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, evdwl, 0.0, - -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid); + -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr); // three-body Forces @@ -435,7 +433,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor); - if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik); + if (evflag) ev_tally3_thr(this,i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik,thr); } } @@ -469,7 +467,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.0, 0.0, - forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid); + forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr); } } } diff --git a/src/USER-OMP/pair_edip_omp.h b/src/USER-OMP/pair_edip_omp.h index 55c34db345..55e10c83bb 100644 --- a/src/USER-OMP/pair_edip_omp.h +++ b/src/USER-OMP/pair_edip_omp.h @@ -34,7 +34,7 @@ class PairEDIPOMP : public PairEDIP, public ThrOMP { private: template <int EVFLAG, int EFLAG, int VFLAG_ATOM> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp index d31ad20120..7184adb781 100644 --- a/src/USER-OMP/pair_eim_omp.cpp +++ b/src/USER-OMP/pair_eim_omp.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairEIMOMP::PairEIMOMP(LAMMPS *lmp) : - PairEIM(lmp), ThrOMP(lmp, PAIR) + PairEIM(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -39,7 +39,6 @@ void PairEIMOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -58,46 +57,39 @@ void PairEIMOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, *rho_t, *fp_t; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - if (force->newton_pair) { - rho_t = rho + tid*nall; - fp_t = fp + tid*nall; - } else { - rho_t = rho + tid*atom->nlocal; - fp_t = fp + tid*atom->nlocal; - } + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_eim(nall, rho, fp); + else + thr->init_eim(atom->nlocal, rho, fp); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, rho_t, fp_t, ifrom, ito, tid); - else eval<1,1,0>(f, rho_t, fp_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, rho_t, fp_t, ifrom, ito, tid); - else eval<1,0,0>(f, rho_t, fp_t, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, rho_t, fp_t, ifrom, ito, tid); - else eval<0,0,0>(f, rho_t, fp_t, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, - int iifrom, int iito, int tid) +void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,m,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -107,10 +99,17 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const fp_t = thr->get_fp(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; double fxtmp,fytmp,fztmp; @@ -118,16 +117,6 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, numneigh = list->numneigh; firstneigh = list->firstneigh; - // zero out density and fp - - if (NEWTON_PAIR) { - memset(rho_t, 0, nall*sizeof(double)); - memset(fp_t, 0, nall*sizeof(double)); - } else { - memset(rho_t, 0, nlocal*sizeof(double)); - memset(fp_t, 0, nlocal*sizeof(double)); - } - // rho = density at each atom // loop over neighbors of my atoms @@ -171,7 +160,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, // communicate and sum densities if (NEWTON_PAIR) { // reduce per thread density - data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(rho, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -185,7 +174,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, } } else { - data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(rho, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -243,7 +232,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, // communicate and sum modified densities if (NEWTON_PAIR) { // reduce per thread density - data_reduce_thr(&(fp[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(fp, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -257,7 +246,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, } } else { - data_reduce_thr(&(fp[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(fp, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); @@ -279,8 +268,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, itype = type[i]; if (EFLAG) { phi = 0.5*rho[i]*fp[i]; - if (eflag_global) eng_vdwl_thr[tid] += phi; - if (eflag_atom) eatom_thr[tid][i] += phi; + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); } } @@ -345,7 +333,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_eim_omp.h b/src/USER-OMP/pair_eim_omp.h index 3693492e09..ad273e28eb 100644 --- a/src/USER-OMP/pair_eim_omp.h +++ b/src/USER-OMP/pair_eim_omp.h @@ -39,7 +39,7 @@ class PairEIMOMP : public PairEIM, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double *rho_t, double *fp_t, int iifrom, int iito, int tid); + void eval(int iifrom, int iito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gauss_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp index e8b255d0b7..4f26670715 100644 --- a/src/USER-OMP/pair_gauss_omp.cpp +++ b/src/USER-OMP/pair_gauss_omp.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGaussOMP::PairGaussOMP(LAMMPS *lmp) : - PairGauss(lmp), ThrOMP(lmp, PAIR) + PairGauss(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,46 +37,44 @@ void PairGaussOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; + double occ = 0.0; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:occ) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) occ = eval<1,1,1>(ifrom, ito, thr); + else occ = eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) occ = eval<1,0,1>(ifrom, ito, thr); + else occ = eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) occ = eval<0,0,1>(ifrom, ito, thr); + else occ = eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); + if (eflag_global) pvector[0] = occ; } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid) +double PairGaussOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -86,10 +84,11 @@ void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -149,14 +148,14 @@ void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } - if (eflag_global) pvector[0] = occ; + return occ; } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-OMP/pair_gauss_omp.h b/src/USER-OMP/pair_gauss_omp.h index 7f8fc9a85b..81d9d0ce3f 100644 --- a/src/USER-OMP/pair_gauss_omp.h +++ b/src/USER-OMP/pair_gauss_omp.h @@ -39,7 +39,7 @@ class PairGaussOMP : public PairGauss, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + double eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp index ff115e8ef7..d8ec6c9b32 100644 --- a/src/USER-OMP/pair_gayberne_omp.cpp +++ b/src/USER-OMP/pair_gayberne_omp.cpp @@ -27,7 +27,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGayBerneOMP::PairGayBerneOMP(LAMMPS *lmp) : - PairGayBerne(lmp), ThrOMP(lmp, PAIR) + PairGayBerne(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -38,7 +38,6 @@ void PairGayBerneOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -46,40 +45,34 @@ void PairGayBerneOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); - else eval<1,1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); - else eval<1,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); - else eval<0,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces and torques into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int tid) +void PairGayBerneOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; @@ -88,11 +81,13 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t int *ilist,*jlist,*numneigh,**firstneigh; double *iquat,*jquat; - double **x = atom->x; - int *ellipsoid = atom->ellipsoid; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const tor = thr->get_torque(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + const int * const ellipsoid = atom->ellipsoid; AtomVecEllipsoid::Bonus *bonus = avec->bonus; @@ -108,6 +103,7 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t i = ilist[ii]; itype = type[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; if (form[itype][itype] == ELLIPSE_ELLIPSE) { iquat = bonus[ellipsoid[i]].quat; @@ -187,12 +183,12 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t ttor[1] *= factor_lj; ttor[2] *= factor_lj; - f[i][0] += fforce[0]; - f[i][1] += fforce[1]; - f[i][2] += fforce[2]; - tor[i][0] += ttor[0]; - tor[i][1] += ttor[1]; - tor[i][2] += ttor[2]; + fxtmp += fforce[0]; + fytmp += fforce[1]; + fztmp += fforce[2]; + t1tmp += ttor[0]; + t2tmp += ttor[1]; + t3tmp += ttor[2]; if (NEWTON_PAIR || j < nlocal) { rtor[0] *= factor_lj; @@ -210,9 +206,15 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fforce[0],fforce[1],fforce[2], - -r12[0],-r12[1],-r12[2],tid); + -r12[0],-r12[1],-r12[2],thr); } } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + tor[i][0] += t1tmp; + tor[i][1] += t2tmp; + tor[i][2] += t3tmp; } } diff --git a/src/USER-OMP/pair_gayberne_omp.h b/src/USER-OMP/pair_gayberne_omp.h index 737b4ec67d..0bd0b8b086 100644 --- a/src/USER-OMP/pair_gayberne_omp.h +++ b/src/USER-OMP/pair_gayberne_omp.h @@ -39,7 +39,7 @@ class PairGayBerneOMP : public PairGayBerne, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp index 1866833afe..23b8b8f5c2 100644 --- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHertzHistoryOMP::PairGranHertzHistoryOMP(LAMMPS *lmp) : - PairGranHertzHistory(lmp), ThrOMP(lmp, PAIR) + PairGranHertzHistory(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,7 +37,6 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int shearupdate = (update->ntimestep > laststep) ? 1 : 0; @@ -47,35 +46,29 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) - if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid); - else eval<1,0>(f, torque, ifrom, ito, tid); + if (shearupdate) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); else - if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid); - else eval<0,0>(f, torque, ifrom, ito, tid); + if (shearupdate) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); - // reduce per thread forces and torque into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - laststep = update->ntimestep; } template <int EVFLAG, int SHEARUPDATE> -void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairGranHertzHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; @@ -90,15 +83,17 @@ void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int int *touch,**firsttouch; double *shear,*allshear,**firstshear; - double **x = atom->x; - double **v = atom->v; - double **omega = atom->omega; - double *radius = atom->radius; - double *rmass = atom->rmass; - double *mass = atom->mass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double t1tmp,t2tmp,t3tmp; @@ -274,7 +269,7 @@ void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, - 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); } } diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.h b/src/USER-OMP/pair_gran_hertz_history_omp.h index 66d7bc0fa5..956e057093 100644 --- a/src/USER-OMP/pair_gran_hertz_history_omp.h +++ b/src/USER-OMP/pair_gran_hertz_history_omp.h @@ -39,7 +39,7 @@ class PairGranHertzHistoryOMP : public PairGranHertzHistory, public ThrOMP { private: template <int EVFLAG, int SHEARUPDATE> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp index ad0537b516..5212b30ce2 100644 --- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) : - PairGranHookeHistory(lmp), ThrOMP(lmp, PAIR) + PairGranHookeHistory(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; // trigger use of OpenMP version of FixShearHistory @@ -42,7 +42,6 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int shearupdate = (update->ntimestep > laststep) ? 1 : 0; @@ -52,38 +51,33 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) - if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid); - else eval<1,0>(f, torque, ifrom, ito, tid); + if (shearupdate) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); else - if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid); - else eval<0,0>(f, torque, ifrom, ito, tid); + if (shearupdate) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); - // reduce per thread forces and torque into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - laststep = update->ntimestep; } template <int EVFLAG, int SHEARUPDATE> -void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; + double myshear[3]; double radi,radj,radsum,rsq,r,rinv,rsqinv; double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; double wr1,wr2,wr3; @@ -95,15 +89,17 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int int *touch,**firsttouch; double *shear,*allshear,**firstshear; - double **x = atom->x; - double **v = atom->v; - double **omega = atom->omega; - double *radius = atom->radius; - double *rmass = atom->rmass; - double *mass = atom->mass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double t1tmp,t2tmp,t3tmp; @@ -144,10 +140,9 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int // unset non-touching neighbors touch[jj] = 0; - shear = &allshear[3*jj]; - shear[0] = 0.0; - shear[1] = 0.0; - shear[2] = 0.0; + myshear[0] = 0.0; + myshear[1] = 0.0; + myshear[2] = 0.0; } else { r = sqrt(rsq); @@ -186,7 +181,6 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int if (mask[i] & freeze_group_bit) meff = rmass[j]; if (mask[j] & freeze_group_bit) meff = rmass[i]; } else { - itype = type[i]; jtype = type[j]; meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]); if (mask[i] & freeze_group_bit) meff = mass[jtype]; @@ -207,31 +201,31 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int // shear history effects touch[jj] = 1; - shear = &allshear[3*jj]; + memcpy(myshear,allshear + 3*jj, 3*sizeof(double)); if (SHEARUPDATE) { - shear[0] += vtr1*dt; - shear[1] += vtr2*dt; - shear[2] += vtr3*dt; + myshear[0] += vtr1*dt; + myshear[1] += vtr2*dt; + myshear[2] += vtr3*dt; } - shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + - shear[2]*shear[2]); + shrmag = sqrt(myshear[0]*myshear[0] + myshear[1]*myshear[1] + + myshear[2]*myshear[2]); // rotate shear displacements - rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz; + rsht = myshear[0]*delx + myshear[1]*dely + myshear[2]*delz; rsht *= rsqinv; if (SHEARUPDATE) { - shear[0] -= rsht*delx; - shear[1] -= rsht*dely; - shear[2] -= rsht*delz; + myshear[0] -= rsht*delx; + myshear[1] -= rsht*dely; + myshear[2] -= rsht*delz; } // tangential forces = shear + tangential velocity damping - fs1 = - (kt*shear[0] + meff*gammat*vtr1); - fs2 = - (kt*shear[1] + meff*gammat*vtr2); - fs3 = - (kt*shear[2] + meff*gammat*vtr3); + fs1 = - (kt*myshear[0] + meff*gammat*vtr1); + fs2 = - (kt*myshear[1] + meff*gammat*vtr2); + fs3 = - (kt*myshear[2] + meff*gammat*vtr3); // rescale frictional displacements and forces if needed @@ -242,9 +236,9 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int if (shrmag != 0.0) { const double fnfs = fn/fs; const double mgkt = meff*gammat/kt; - shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1; - shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2; - shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3; + myshear[0] = fnfs * (myshear[0] + mgkt*vtr1) - mgkt*vtr1; + myshear[1] = fnfs * (myshear[1] + mgkt*vtr2) - mgkt*vtr2; + myshear[2] = fnfs * (myshear[2] + mgkt*vtr3) - mgkt*vtr3; fs1 *= fnfs; fs2 *= fnfs; fs3 *= fnfs; @@ -277,9 +271,10 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, - 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); } + memcpy(allshear + 3*jj, myshear, 3*sizeof(double)); } f[i][0] += fxtmp; f[i][1] += fytmp; diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.h b/src/USER-OMP/pair_gran_hooke_history_omp.h index 33325025fc..7588469e74 100644 --- a/src/USER-OMP/pair_gran_hooke_history_omp.h +++ b/src/USER-OMP/pair_gran_hooke_history_omp.h @@ -39,7 +39,7 @@ class PairGranHookeHistoryOMP : public PairGranHookeHistory, public ThrOMP { private: template <int EVFLAG, int SHEARUPDATE> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp index d6991fa453..fda9295b70 100644 --- a/src/USER-OMP/pair_gran_hooke_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHookeOMP::PairGranHookeOMP(LAMMPS *lmp) : - PairGranHooke(lmp), ThrOMP(lmp, PAIR) + PairGranHooke(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairGranHookeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,33 +43,28 @@ void PairGranHookeOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) - if (force->newton_pair) eval<1,1>(f, torque, ifrom, ito, tid); - else eval<1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); else - if (force->newton_pair) eval<0,1>(f, torque, ifrom, ito, tid); - else eval<0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); - // reduce per thread forces and torque into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); } template <int EVFLAG, int NEWTON_PAIR> -void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +void PairGranHookeOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; @@ -82,15 +76,17 @@ void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, i double fn,fs,ft,fs1,fs2,fs3; int *ilist,*jlist,*numneigh,**firstneigh; - double **x = atom->x; - double **v = atom->v; - double **omega = atom->omega; - double *radius = atom->radius; - double *rmass = atom->rmass; - double *mass = atom->mass; - int *type = atom->type; - int *mask = atom->mask; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double t1tmp,t2tmp,t3tmp; @@ -216,7 +212,7 @@ void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, i } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, - 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); } } diff --git a/src/USER-OMP/pair_gran_hooke_omp.h b/src/USER-OMP/pair_gran_hooke_omp.h index f2b093778c..b275992bfa 100644 --- a/src/USER-OMP/pair_gran_hooke_omp.h +++ b/src/USER-OMP/pair_gran_hooke_omp.h @@ -39,7 +39,7 @@ class PairGranHookeOMP : public PairGranHooke, public ThrOMP { private: template <int EVFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp index 012fd596b3..5da3f2bdfa 100644 --- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp @@ -31,7 +31,7 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ PairHbondDreidingLJOMP::PairHbondDreidingLJOMP(LAMMPS *lmp) : - PairHbondDreidingLJ(lmp), ThrOMP(lmp, PAIR) + PairHbondDreidingLJ(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; hbcount_thr = hbeng_thr = NULL; @@ -54,7 +54,6 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -72,35 +71,31 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); - // reduce per thread hbond data if (eflag_global) { pvector[0] = 0.0; @@ -113,25 +108,26 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) +void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr) { - int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype; + int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype; double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2; double factor_hb,force_angle,force_kernel,evdwl,eng_lj; double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2; double fi[3],fj[3],delr1[3],delr2[3]; double r2inv,r10inv; double switch1,switch2; - int *ilist,*jlist,*klist,*numneigh,**firstneigh; + int *ilist,*jlist,*numneigh,**firstneigh; Param *pm; evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int **special = atom->special; - int **nspecial = atom->nspecial; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const double * const special_lj = force->special_lj; + const int * const * const nspecial = atom->nspecial; + const int * const * const special = atom->special; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -152,8 +148,8 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) itype = type[i]; if (!donor[itype]) continue; - klist = special[i]; - knum = nspecial[i][0]; + const int * const klist = special[i]; + const int knum = nspecial[i][0]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; @@ -270,7 +266,7 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) // KIJ instead of IJK b/c delr1/delr2 are both with respect to k - if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid); + if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr); if (EFLAG) { hbcount++; hbeng += evdwl; @@ -283,6 +279,7 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) f[i][1] += fytmp; f[i][2] += fztmp; } + const int tid = thr->get_tid(); hbcount_thr[tid] = static_cast<double>(hbcount); hbeng_thr[tid] = hbeng; } diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h index 1aef78490c..9373916849 100644 --- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h @@ -43,7 +43,7 @@ class PairHbondDreidingLJOMP : public PairHbondDreidingLJ, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp index b6c966f8c7..bce4efdd3a 100644 --- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp @@ -31,7 +31,7 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ PairHbondDreidingMorseOMP::PairHbondDreidingMorseOMP(LAMMPS *lmp) : - PairHbondDreidingMorse(lmp), ThrOMP(lmp, PAIR) + PairHbondDreidingMorse(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; hbcount_thr = hbeng_thr = NULL; @@ -54,7 +54,6 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -72,35 +71,31 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); - // reduce per thread hbond data if (eflag_global) { pvector[0] = 0.0; @@ -113,24 +108,25 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) +void PairHbondDreidingMorseOMP::eval(int iifrom, int iito, ThrData * const thr) { - int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype; + int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype; double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2; double factor_hb,force_angle,force_kernel,evdwl; double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2; double fi[3],fj[3],delr1[3],delr2[3]; double r,dr,dexp,eng_morse,switch1,switch2; - int *ilist,*jlist,*klist,*numneigh,**firstneigh; + int *ilist,*jlist,*numneigh,**firstneigh; Param *pm; evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int **special = atom->special; - int **nspecial = atom->nspecial; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const double * const special_lj = force->special_lj; + const int * const * const nspecial = atom->nspecial; + const int * const * const special = atom->special; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -151,8 +147,8 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) itype = type[i]; if (!donor[itype]) continue; - klist = special[i]; - knum = nspecial[i][0]; + const int * const klist = special[i]; + const int knum = nspecial[i][0]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; @@ -268,7 +264,7 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) // KIJ instead of IJK b/c delr1/delr2 are both with respect to k - if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid); + if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr); if (EFLAG) { hbcount++; hbeng += evdwl; @@ -281,6 +277,7 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) f[i][1] += fytmp; f[i][2] += fztmp; } + const int tid = thr->get_tid(); hbcount_thr[tid] = static_cast<double>(hbcount); hbeng_thr[tid] = hbeng; } diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h index 2a13c618c6..d2edd7281b 100644 --- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h @@ -43,7 +43,7 @@ class PairHbondDreidingMorseOMP : public PairHbondDreidingMorse, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj96_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp index f0998363e1..68733c1093 100644 --- a/src/USER-OMP/pair_lj96_cut_omp.cpp +++ b/src/USER-OMP/pair_lj96_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJ96CutOMP::PairLJ96CutOMP(LAMMPS *lmp) : - PairLJ96Cut(lmp), ThrOMP(lmp, PAIR) + PairLJ96Cut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJ96CutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJ96CutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJ96CutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -141,8 +137,8 @@ void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj96_cut_omp.h b/src/USER-OMP/pair_lj96_cut_omp.h index 333212303d..a8040320c8 100644 --- a/src/USER-OMP/pair_lj96_cut_omp.h +++ b/src/USER-OMP/pair_lj96_cut_omp.h @@ -39,7 +39,7 @@ class PairLJ96CutOMP : public PairLJ96Cut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp index 32ad05acda..edfbe1f527 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCharmmCoulCharmmImplicitOMP::PairLJCharmmCoulCharmmImplicitOMP(LAMMPS *lmp) : - PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, PAIR) + PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,64 +43,60 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulCharmmImplicitOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; double philj,switch1,switch2; - double invdenom_coul,invdenom_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; - invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; + const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; + const double invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; // loop over neighbors of my atoms @@ -193,7 +188,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, i } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h index ba016d7d3d..dff01ce499 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h @@ -39,7 +39,7 @@ class PairLJCharmmCoulCharmmImplicitOMP : public PairLJCharmmCoulCharmmImplicit, private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp index 6dac7a17f6..efdcc995da 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCharmmCoulCharmmOMP::PairLJCharmmCoulCharmmOMP(LAMMPS *lmp) : - PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, PAIR) + PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,64 +43,60 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulCharmmOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; double philj,switch1,switch2; - double invdenom_coul,invdenom_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; - invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; + const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; + const double invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; // loop over neighbors of my atoms @@ -193,7 +188,7 @@ void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h index f2889b05fe..0eda030ebd 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h @@ -39,7 +39,7 @@ class PairLJCharmmCoulCharmmOMP : public PairLJCharmmCoulCharmm, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp index c99f27f2e1..f9f32ea119 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCharmmCoulLongOMP::PairLJCharmmCoulLongOMP(LAMMPS *lmp) : - PairLJCharmmCoulLong(lmp), ThrOMP(lmp, PAIR) + PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype,itable; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -97,13 +92,14 @@ void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -214,7 +210,7 @@ void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h index b14e4c1fe4..91b9c01c1a 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h @@ -39,7 +39,7 @@ class PairLJCharmmCoulLongOMP : public PairLJCharmmCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp index 0321882793..e54c348e64 100644 --- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJClass2CoulCutOMP::PairLJClass2CoulCutOMP(LAMMPS *lmp) : - PairLJClass2CoulCut(lmp), ThrOMP(lmp, PAIR) + PairLJClass2CoulCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2CoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -87,13 +82,14 @@ void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -163,9 +159,9 @@ void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h index 5fe4895691..b22a29aa18 100644 --- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h @@ -39,7 +39,7 @@ class PairLJClass2CoulCutOMP : public PairLJClass2CoulCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp index 84d26ceb14..20ad947d23 100644 --- a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJClass2CoulLongOMP::PairLJClass2CoulLongOMP(LAMMPS *lmp) : - PairLJClass2CoulLong(lmp), ThrOMP(lmp, PAIR) + PairLJClass2CoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2CoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -95,13 +90,14 @@ void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -181,7 +177,7 @@ void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.h b/src/USER-OMP/pair_lj_class2_coul_long_omp.h index da4ac3680f..b32799bf84 100644 --- a/src/USER-OMP/pair_lj_class2_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.h @@ -39,7 +39,7 @@ class PairLJClass2CoulLongOMP : public PairLJClass2CoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_class2_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp index 4f5d2550fc..cff80d3f1d 100644 --- a/src/USER-OMP/pair_lj_class2_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJClass2OMP::PairLJClass2OMP(LAMMPS *lmp) : - PairLJClass2(lmp), ThrOMP(lmp, PAIR) + PairLJClass2(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJClass2OMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJClass2OMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2OMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -141,8 +137,8 @@ void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_class2_omp.h b/src/USER-OMP/pair_lj_class2_omp.h index cfe24bb714..317c7376c5 100644 --- a/src/USER-OMP/pair_lj_class2_omp.h +++ b/src/USER-OMP/pair_lj_class2_omp.h @@ -39,7 +39,7 @@ class PairLJClass2OMP : public PairLJClass2, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_coul_omp.cpp b/src/USER-OMP/pair_lj_coul_omp.cpp index 23e2a8d906..ae15087ba9 100644 --- a/src/USER-OMP/pair_lj_coul_omp.cpp +++ b/src/USER-OMP/pair_lj_coul_omp.cpp @@ -34,7 +34,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCoulOMP::PairLJCoulOMP(LAMMPS *lmp) : - PairLJCoul(lmp), ThrOMP(lmp, PAIR) + PairLJCoul(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -45,7 +45,6 @@ void PairLJCoulOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -53,53 +52,50 @@ void PairLJCoulOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCoulOMP::eval(int iifrom, int iito, ThrData * const thr) { double evdwl,ecoul,fpair; evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; - double *x0 = x[0]; + const double *x0 = x[0]; double *f0 = f[0], *fi = f0; int *ilist = list->ilist; @@ -127,7 +123,7 @@ void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid) ni = sbmask(j); j &= NEIGHMASK; - { register double *xj = x0+(j+(j<<1)); + { register const double *xj = x0+(j+(j<<1)); d[0] = xi[0] - xj[0]; // pair vector d[1] = xi[1] - xj[1]; d[2] = xi[2] - xj[2]; } @@ -218,7 +214,7 @@ void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,d[0],d[1],d[2],tid); + evdwl,ecoul,fpair,d[0],d[1],d[2],thr); } } } diff --git a/src/USER-OMP/pair_lj_coul_omp.h b/src/USER-OMP/pair_lj_coul_omp.h index 619e609ba8..e2259e16a0 100644 --- a/src/USER-OMP/pair_lj_coul_omp.h +++ b/src/USER-OMP/pair_lj_coul_omp.h @@ -39,7 +39,7 @@ class PairLJCoulOMP : public PairLJCoul, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cubic_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp index 4f806bd71f..09e44a9107 100644 --- a/src/USER-OMP/pair_lj_cubic_omp.cpp +++ b/src/USER-OMP/pair_lj_cubic_omp.cpp @@ -26,7 +26,7 @@ using namespace PairLJCubicConstants; /* ---------------------------------------------------------------------- */ PairLJCubicOMP::PairLJCubicOMP(LAMMPS *lmp) : - PairLJCubic(lmp), ThrOMP(lmp, PAIR) + PairLJCubic(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,7 +37,6 @@ void PairLJCubicOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -45,38 +44,34 @@ void PairLJCubicOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCubicOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -86,10 +81,11 @@ void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -152,8 +148,8 @@ void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cubic_omp.h b/src/USER-OMP/pair_lj_cubic_omp.h index 559a6125ab..a6ed7d2b97 100644 --- a/src/USER-OMP/pair_lj_cubic_omp.h +++ b/src/USER-OMP/pair_lj_cubic_omp.h @@ -39,7 +39,7 @@ class PairLJCubicOMP : public PairLJCubic, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp index be98ec38fc..46114ce613 100644 --- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutCoulCutOMP::PairLJCutCoulCutOMP(LAMMPS *lmp) : - PairLJCutCoulCut(lmp), ThrOMP(lmp, PAIR) + PairLJCutCoulCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -86,13 +81,14 @@ void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -159,11 +155,11 @@ void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - offset[itype][jtype]; evdwl *= factor_lj; - } - } else evdwl = 0.0; + } else evdwl = 0.0; + } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h index c8c34e2591..3d4be420e7 100644 --- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h @@ -39,7 +39,7 @@ class PairLJCutCoulCutOMP : public PairLJCutCoulCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp index 13a4a1906f..9d96f31dba 100644 --- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutCoulDebyeOMP::PairLJCutCoulDebyeOMP(LAMMPS *lmp) : - PairLJCutCoulDebye(lmp), ThrOMP(lmp, PAIR) + PairLJCutCoulDebye(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -87,13 +82,14 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -129,7 +125,6 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - if (rsq < cut_coulsq[itype][jtype]) { r = sqrt(rsq); rinv = 1.0/r; @@ -165,8 +160,9 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } else evdwl = 0.0; } + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h index 00cf540be2..e2205cb7ce 100644 --- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h @@ -39,7 +39,7 @@ class PairLJCutCoulDebyeOMP : public PairLJCutCoulDebye, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp index 1d8f977c96..79976bf8a8 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp @@ -33,7 +33,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutCoulLongOMP::PairLJCutCoulLongOMP(LAMMPS *lmp) : - PairLJCutCoulLong(lmp), ThrOMP(lmp, PAIR) + PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -44,7 +44,6 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -52,40 +51,36 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype,itable; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -96,13 +91,14 @@ void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -198,9 +194,9 @@ void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_omp.h index ac408ba886..a907959ae3 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.h @@ -39,7 +39,7 @@ class PairLJCutCoulLongOMP : public PairLJCutCoulLong, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp index 6ada944c53..78f35709a2 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp @@ -36,7 +36,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutCoulLongTIP4POMP::PairLJCutCoulLongTIP4POMP(LAMMPS *lmp) : - PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, PAIR) + PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; @@ -61,7 +61,6 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nlocal = atom->nlocal; @@ -76,8 +75,8 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag) } // cache corrected M positions in mpos[] - double **x = atom->x; - int *type = atom->type; + const double * const * const x = atom->x; + const int * const type = atom->type; for (int i = 0; i < nlocal; i++) { if (type[i] == typeO) { find_M(i,h1idx[i],h2idx[i],mpos[i]); @@ -101,39 +100,35 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (vflag) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (vflag) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (vflag) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (vflag) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - eval<0,0,0>(f, ifrom, ito, tid); + eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int VFLAG> -void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulLongTIP4POMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype,itable; int n,vlist[6]; @@ -151,13 +146,14 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -216,7 +212,7 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1, - evdwl,0.0,forcelj,delx,dely,delz,tid); + evdwl,0.0,forcelj,delx,dely,delz,thr); } // adjust rsq and delxyz for off-site O charge(s) @@ -423,7 +419,7 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; } else ecoul = 0.0; - if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,tid); + if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr); } } } diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h index 093fc0216b..ff49bdcedb 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h +++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h @@ -39,7 +39,6 @@ class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP { virtual double memory_usage(); protected: - // this is to cache m-shift corrected positions. int maxmpos; // size of the following arrays int *h1idx, *h2idx; // local index of hydrogen atoms @@ -48,7 +47,7 @@ class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP { private: template <int EVFLAG, int EFLAG, int VFLAG> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp index 3d82149fec..4932a784bb 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) + PairLJCut(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJCutOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -139,8 +135,8 @@ void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_cut_omp.h b/src/USER-OMP/pair_lj_cut_omp.h index 56f9f9b8a5..f97996e480 100644 --- a/src/USER-OMP/pair_lj_cut_omp.h +++ b/src/USER-OMP/pair_lj_cut_omp.h @@ -39,7 +39,7 @@ class PairLJCutOMP : public PairLJCut, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_expand_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp index 7b06503ee4..4f93d3bd42 100644 --- a/src/USER-OMP/pair_lj_expand_omp.cpp +++ b/src/USER-OMP/pair_lj_expand_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJExpandOMP::PairLJExpandOMP(LAMMPS *lmp) : - PairLJExpand(lmp), ThrOMP(lmp, PAIR) + PairLJExpand(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJExpandOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJExpandOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJExpandOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -85,10 +80,11 @@ void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -143,8 +139,8 @@ void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_expand_omp.h b/src/USER-OMP/pair_lj_expand_omp.h index 29488deae8..9ff8d3080a 100644 --- a/src/USER-OMP/pair_lj_expand_omp.h +++ b/src/USER-OMP/pair_lj_expand_omp.h @@ -39,7 +39,7 @@ class PairLJExpandOMP : public PairLJExpand, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp index 2e97fa1b5e..ca8875c7f8 100644 --- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJGromacsCoulGromacsOMP::PairLJGromacsCoulGromacsOMP(LAMMPS *lmp) : - PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, PAIR) + PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,40 +43,36 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJGromacsCoulGromacsOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; @@ -87,13 +82,14 @@ void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid evdwl = ecoul = 0.0; - double **x = atom->x; - double *q = atom->q; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -190,7 +186,7 @@ void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h index d789bd6797..ee506c2c4a 100644 --- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h @@ -39,7 +39,7 @@ class PairLJGromacsCoulGromacsOMP : public PairLJGromacsCoulGromacs, public ThrO private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp index f1c7d2faf9..abdc4c5ccf 100644 --- a/src/USER-OMP/pair_lj_gromacs_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJGromacsOMP::PairLJGromacsOMP(LAMMPS *lmp) : - PairLJGromacs(lmp), ThrOMP(lmp, PAIR) + PairLJGromacs(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJGromacsOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJGromacsOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJGromacsOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -85,10 +80,11 @@ void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -151,8 +147,8 @@ void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_omp.h index d192a414ef..8e0f4bd281 100644 --- a/src/USER-OMP/pair_lj_gromacs_omp.h +++ b/src/USER-OMP/pair_lj_gromacs_omp.h @@ -39,7 +39,7 @@ class PairLJGromacsOMP : public PairLJGromacs, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp index 55ee908e47..47cc23bf91 100644 --- a/src/USER-OMP/pair_lj_sf_omp.cpp +++ b/src/USER-OMP/pair_lj_sf_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJShiftedForceOMP::PairLJShiftedForceOMP(LAMMPS *lmp) : - PairLJShiftedForce(lmp), ThrOMP(lmp, PAIR) + PairLJShiftedForce(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJShiftedForceOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -142,8 +138,8 @@ void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_sf_omp.h b/src/USER-OMP/pair_lj_sf_omp.h index 6fba43fb8f..c73c8f746b 100644 --- a/src/USER-OMP/pair_lj_sf_omp.h +++ b/src/USER-OMP/pair_lj_sf_omp.h @@ -39,7 +39,7 @@ class PairLJShiftedForceOMP : public PairLJShiftedForce, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_lj_smooth_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp index 1ad88044a6..4bf9ceb41c 100644 --- a/src/USER-OMP/pair_lj_smooth_omp.cpp +++ b/src/USER-OMP/pair_lj_smooth_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJSmoothOMP::PairLJSmoothOMP(LAMMPS *lmp) : - PairLJSmooth(lmp), ThrOMP(lmp, PAIR) + PairLJSmooth(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairLJSmoothOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairLJSmoothOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJSmoothOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -85,10 +80,11 @@ void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -155,8 +151,8 @@ void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_lj_smooth_omp.h b/src/USER-OMP/pair_lj_smooth_omp.h index de27a4008d..eb6eb92dec 100644 --- a/src/USER-OMP/pair_lj_smooth_omp.h +++ b/src/USER-OMP/pair_lj_smooth_omp.h @@ -39,7 +39,7 @@ class PairLJSmoothOMP : public PairLJSmooth, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_morse_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp index a53e35a977..f61fd4e383 100644 --- a/src/USER-OMP/pair_morse_omp.cpp +++ b/src/USER-OMP/pair_morse_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairMorseOMP::PairMorseOMP(LAMMPS *lmp) : - PairMorse(lmp), ThrOMP(lmp, PAIR) + PairMorse(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairMorseOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairMorseOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid) +void PairMorseOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -139,8 +135,8 @@ void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_morse_omp.h b/src/USER-OMP/pair_morse_omp.h index a966e6f11f..a20aad6716 100644 --- a/src/USER-OMP/pair_morse_omp.h +++ b/src/USER-OMP/pair_morse_omp.h @@ -39,7 +39,7 @@ class PairMorseOMP : public PairMorse, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp index 7cb1e83086..e052271e4f 100644 --- a/src/USER-OMP/pair_peri_lps_omp.cpp +++ b/src/USER-OMP/pair_peri_lps_omp.cpp @@ -26,15 +26,18 @@ #include "modify.h" #include "neighbor.h" #include "neigh_list.h" +#include "math_const.h" using namespace LAMMPS_NS; +using namespace MathConst; /* ---------------------------------------------------------------------- */ PairPeriLPSOMP::PairPeriLPSOMP(LAMMPS *lmp) : - PairPeriLPS(lmp), ThrOMP(lmp, PAIR) + PairPeriLPS(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; + fix_name = "PERI_NEIGH_OMP"; } /* ---------------------------------------------------------------------- */ @@ -43,7 +46,6 @@ void PairPeriLPSOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -61,38 +63,34 @@ void PairPeriLPSOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) +void PairPeriLPSOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz; @@ -103,9 +101,10 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double *vfrac = atom->vfrac; @@ -151,7 +150,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; - + delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; @@ -182,7 +181,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) // of the bond-based theory used in PMB model double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) / - (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]); + (MY_PI * cutsq[itype][jtype] * cutsq[itype][jtype]); rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]); if (r > 0.0) fpair = -(rk/r); @@ -199,7 +198,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) evdwl = 0.5*rk*dr; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, - fpair*vfrac[i],delx,dely,delz,tid); + fpair*vfrac[i],delx,dely,delz,thr); } } f[i][0] += fxtmp; @@ -214,7 +213,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int idelta = 1 + nlocal/comm->nthreads; - iifrom = tid*idelta; + iifrom = thr->get_tid()*idelta; iito = iifrom + idelta; if (iito > nlocal) iito = nlocal; @@ -234,7 +233,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) #endif { // communicate dilatation (theta) of each particle comm->forward_comm_pair(this); - // communicate wighted volume (wvolume) upon every reneighbor + // communicate weighted volume (wvolume) upon every reneighbor if (neighbor->ago == 0) comm->forward_comm_fix(modify->fix[ifix_peri]); } @@ -245,10 +244,8 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) { for (i = iifrom; i < iito; i++) { itype = type[i]; - if (eflag_global) - eng_vdwl_thr[tid] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]); - if (eflag_atom) - eatom_thr[tid][i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]); + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, + 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]), 0.0, thr); } } @@ -332,7 +329,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) omega_plus*(deviatoric_extension * deviatoric_extension) * vfrac[j] * vfrac_scale; if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0, - 0.5*fbond*vfrac[i],delx,dely,delz,tid); + 0.5*fbond*vfrac[i],delx,dely,delz,thr); // find stretch in bond I-J and break if necessary // use s0 from previous timestep diff --git a/src/USER-OMP/pair_peri_lps_omp.h b/src/USER-OMP/pair_peri_lps_omp.h index 2068830ca0..f234a41098 100644 --- a/src/USER-OMP/pair_peri_lps_omp.h +++ b/src/USER-OMP/pair_peri_lps_omp.h @@ -43,7 +43,7 @@ class PairPeriLPSOMP : public PairPeriLPS, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp index 4e46d142d9..96e991bab6 100644 --- a/src/USER-OMP/pair_peri_pmb_omp.cpp +++ b/src/USER-OMP/pair_peri_pmb_omp.cpp @@ -32,9 +32,10 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairPeriPMBOMP::PairPeriPMBOMP(LAMMPS *lmp) : - PairPeriPMB(lmp), ThrOMP(lmp, PAIR) + PairPeriPMB(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; + fix_name = "PERI_NEIGH_OMP"; } /* ---------------------------------------------------------------------- */ @@ -43,7 +44,6 @@ void PairPeriPMBOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -59,38 +59,34 @@ void PairPeriPMBOMP::compute(int eflag, int vflag) } #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) +void PairPeriPMBOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz; @@ -101,9 +97,10 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double *vfrac = atom->vfrac; @@ -148,10 +145,11 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; - + delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; delx0 = xtmp0 - x0[j][0]; dely0 = ytmp0 - x0[j][1]; @@ -190,7 +188,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) evdwl = 0.5*rk*dr; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, - fpair*vfrac[i],delx,dely,delz,tid); + fpair*vfrac[i],delx,dely,delz,thr); } } f[i][0] += fxtmp; @@ -205,7 +203,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int idelta = 1 + nlocal/comm->nthreads; - iifrom = tid*idelta; + iifrom = thr->get_tid()*idelta; iito = iifrom + idelta; if (iito > nlocal) iito = nlocal; @@ -278,7 +276,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) evdwl = 0.5*rk*dr; if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0, - 0.5*fbond*vfrac[i],delx,dely,delz,tid); + 0.5*fbond*vfrac[i],delx,dely,delz,thr); // find stretch in bond I-J and break if necessary // use s0 from previous timestep @@ -291,13 +289,14 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch); else s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch)); + first = false; } } sync_threads(); - // store new s0 + // store new s0 (in parallel) for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; } diff --git a/src/USER-OMP/pair_peri_pmb_omp.h b/src/USER-OMP/pair_peri_pmb_omp.h index 9940e5ed15..8a7fc091d9 100644 --- a/src/USER-OMP/pair_peri_pmb_omp.h +++ b/src/USER-OMP/pair_peri_pmb_omp.h @@ -39,7 +39,7 @@ class PairPeriPMBOMP : public PairPeriPMB, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp index 4870553050..cef5aaefc5 100644 --- a/src/USER-OMP/pair_resquared_omp.cpp +++ b/src/USER-OMP/pair_resquared_omp.cpp @@ -27,7 +27,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairRESquaredOMP::PairRESquaredOMP(LAMMPS *lmp) : - PairRESquared(lmp), ThrOMP(lmp, PAIR) + PairRESquared(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -38,7 +38,6 @@ void PairRESquaredOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -46,40 +45,34 @@ void PairRESquaredOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f, **torque; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); - torque = atom->torque + tid*nall; + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); - else eval<1,1,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); - else eval<1,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); - else eval<0,0,0>(f, torque, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces and torques into global arrays. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); - data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int tid) +void PairRESquaredOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; @@ -87,11 +80,12 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int int *ilist,*jlist,*numneigh,**firstneigh; RE2Vars wi,wj; - double **x = atom->x; - int *ellipsoid = atom->ellipsoid; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const tor = thr->get_torque(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; @@ -105,6 +99,7 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int i = ilist[ii]; itype = type[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; // not a LJ sphere @@ -129,6 +124,8 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int // compute if less than cutoff if (rsq < cutsq[itype][jtype]) { + fforce[0] = fforce[1] = fforce[2] = 0.0; + switch (form[itype][jtype]) { case SPHERE_SPHERE: @@ -157,17 +154,17 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int case ELLIPSE_SPHERE: one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true); - tor[i][0] += ttor[0]*factor_lj; - tor[i][1] += ttor[1]*factor_lj; - tor[i][2] += ttor[2]*factor_lj; + t1tmp += ttor[0]*factor_lj; + t2tmp += ttor[1]*factor_lj; + t3tmp += ttor[2]*factor_lj; break; default: precompute_i(j,wj); one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor); - tor[i][0] += ttor[0]*factor_lj; - tor[i][1] += ttor[1]*factor_lj; - tor[i][2] += ttor[2]*factor_lj; + t1tmp += ttor[0]*factor_lj; + t2tmp += ttor[1]*factor_lj; + t3tmp += ttor[2]*factor_lj; if (NEWTON_PAIR || j < nlocal) { tor[j][0] += rtor[0]*factor_lj; tor[j][1] += rtor[1]*factor_lj; @@ -179,9 +176,9 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int fforce[0] *= factor_lj; fforce[1] *= factor_lj; fforce[2] *= factor_lj; - f[i][0] += fforce[0]; - f[i][1] += fforce[1]; - f[i][2] += fforce[2]; + fxtmp += fforce[0]; + fytmp += fforce[1]; + fztmp += fforce[2]; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= fforce[0]; @@ -193,9 +190,15 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fforce[0],fforce[1],fforce[2], - -r12[0],-r12[1],-r12[2],tid); + -r12[0],-r12[1],-r12[2],thr); } } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + tor[i][0] += t1tmp; + tor[i][1] += t2tmp; + tor[i][2] += t3tmp; } } diff --git a/src/USER-OMP/pair_resquared_omp.h b/src/USER-OMP/pair_resquared_omp.h index 2a50bb6dd0..53a6e2e28f 100644 --- a/src/USER-OMP/pair_resquared_omp.h +++ b/src/USER-OMP/pair_resquared_omp.h @@ -39,7 +39,7 @@ class PairRESquaredOMP : public PairRESquared, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, double **torque, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_soft_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp index 9f9673a28b..cbc1c9f7fd 100644 --- a/src/USER-OMP/pair_soft_omp.cpp +++ b/src/USER-OMP/pair_soft_omp.cpp @@ -29,7 +29,7 @@ using namespace MathConst; /* ---------------------------------------------------------------------- */ PairSoftOMP::PairSoftOMP(LAMMPS *lmp) : - PairSoft(lmp), ThrOMP(lmp, PAIR) + PairSoft(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -40,7 +40,6 @@ void PairSoftOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -48,38 +47,34 @@ void PairSoftOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid) +void PairSoftOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -88,10 +83,11 @@ void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -141,8 +137,8 @@ void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid) if (EFLAG) evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg*r)); - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_soft_omp.h b/src/USER-OMP/pair_soft_omp.h index 840d874601..1698089521 100644 --- a/src/USER-OMP/pair_soft_omp.h +++ b/src/USER-OMP/pair_soft_omp.h @@ -39,7 +39,7 @@ class PairSoftOMP : public PairSoft, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp index 5d7f1a60d7..12aceed1d4 100644 --- a/src/USER-OMP/pair_sw_omp.cpp +++ b/src/USER-OMP/pair_sw_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairSWOMP::PairSWOMP(LAMMPS *lmp) : - PairSW(lmp), ThrOMP(lmp, PAIR) + PairSW(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairSWOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,33 +43,29 @@ void PairSWOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - eval<1,1>(f, ifrom, ito, tid); + eval<1,1>(ifrom, ito, thr); } else { - eval<1,0>(f, ifrom, ito, tid); + eval<1,0>(ifrom, ito, thr); } - } else eval<0,0>(f, ifrom, ito, tid); + } else eval<0,0>(ifrom, ito, thr); - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG> -void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) +void PairSWOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,k,ii,jj,kk,jnum,jnumm1,itag,jtag; int itype,jtype,ktype,ijparam,ikparam,ijkparam; @@ -81,10 +76,11 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *tag = atom->tag; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const tag = atom->tag; + const int * const type = atom->type; + const int nlocal = atom->nlocal; ilist = list->ilist; numneigh = list->numneigh; @@ -92,7 +88,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) double fxtmp,fytmp,fztmp; - // loop over neighbors of my atoms + // loop over full neighbor list of my atoms for (ii = iifrom; ii < iito; ++ii) { @@ -144,7 +140,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) f[j][2] -= delz*fpair; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } jnumm1 = jnum - 1; @@ -189,7 +185,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) f[k][1] += fk[1]; f[k][2] += fk[2]; - if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,tid); + if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,thr); } f[j][0] += fjxtmp; f[j][1] += fjytmp; diff --git a/src/USER-OMP/pair_sw_omp.h b/src/USER-OMP/pair_sw_omp.h index 40052d7d41..c4af86007a 100644 --- a/src/USER-OMP/pair_sw_omp.h +++ b/src/USER-OMP/pair_sw_omp.h @@ -39,7 +39,7 @@ class PairSWOMP : public PairSW, public ThrOMP { private: template <int EVFLAG, int EFLAG> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_table_omp.cpp b/src/USER-OMP/pair_table_omp.cpp index 6b14d4c981..e8d63e590d 100644 --- a/src/USER-OMP/pair_table_omp.cpp +++ b/src/USER-OMP/pair_table_omp.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairTableOMP::PairTableOMP(LAMMPS *lmp) : - PairTable(lmp), ThrOMP(lmp, PAIR) + PairTable(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -37,7 +37,6 @@ void PairTableOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -45,38 +44,34 @@ void PairTableOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) +void PairTableOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype,itable; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -89,10 +84,11 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -122,7 +118,7 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; - + if (rsq < cutsq[itype][jtype]) { tb = &tables[tabindex[itype][jtype]]; if (rsq < tb->innersq) @@ -181,7 +177,7 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) } if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } diff --git a/src/USER-OMP/pair_table_omp.h b/src/USER-OMP/pair_table_omp.h index 6fd1ce74a4..974149b9ac 100644 --- a/src/USER-OMP/pair_table_omp.h +++ b/src/USER-OMP/pair_table_omp.h @@ -39,7 +39,7 @@ class PairTableOMP : public PairTable, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp index f59a8488f7..fdbcd48292 100644 --- a/src/USER-OMP/pair_tersoff_omp.cpp +++ b/src/USER-OMP/pair_tersoff_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairTersoffOMP::PairTersoffOMP(LAMMPS *lmp) : - PairTersoff(lmp), ThrOMP(lmp, PAIR) + PairTersoff(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairTersoffOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = vflag_atom = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,35 +43,31 @@ void PairTersoffOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } - } else eval<0,0,0>(f, ifrom, ito, tid); + } else eval<0,0,0>(ifrom, ito, thr); - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int VFLAG_ATOM> -void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) +void PairTersoffOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,k,ii,jj,kk,jnum; int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; @@ -84,10 +79,11 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *tag = atom->tag; - int *type = atom->type; - int nlocal = atom->nlocal; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const tag = atom->tag; + const int * const type = atom->type; + const int nlocal = atom->nlocal; ilist = list->ilist; numneigh = list->numneigh; @@ -147,7 +143,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) f[j][2] -= delz*fpair; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } // three-body interactions @@ -199,7 +195,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) fjztmp -= delr1[2]*fpair; if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0, - -fpair,-delr1[0],-delr1[1],-delr1[2],tid); + -fpair,-delr1[0],-delr1[1],-delr1[2],thr); // attractive term via loop over k @@ -229,7 +225,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) f[k][1] += fk[1]; f[k][2] += fk[2]; - if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,tid); + if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr); } f[j][0] += fjxtmp; f[j][1] += fjytmp; diff --git a/src/USER-OMP/pair_tersoff_omp.h b/src/USER-OMP/pair_tersoff_omp.h index 5e5dc066d2..97c20548af 100644 --- a/src/USER-OMP/pair_tersoff_omp.h +++ b/src/USER-OMP/pair_tersoff_omp.h @@ -34,7 +34,7 @@ class PairTersoffOMP : public PairTersoff, public ThrOMP { private: template <int EVFLAG, int EFLAG, int VFLAG_ATOM> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp index 710ad9df18..6caa13ee90 100644 --- a/src/USER-OMP/pair_yukawa_colloid_omp.cpp +++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairYukawaColloidOMP::PairYukawaColloidOMP(LAMMPS *lmp) : - PairYukawaColloid(lmp), ThrOMP(lmp, PAIR) + PairYukawaColloid(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid) +void PairYukawaColloidOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj; @@ -84,11 +79,12 @@ void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - double *radius = atom->radius; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const radius = atom->radius; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -144,7 +140,7 @@ void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.h b/src/USER-OMP/pair_yukawa_colloid_omp.h index 9483cd15c1..c424e9eff3 100644 --- a/src/USER-OMP/pair_yukawa_colloid_omp.h +++ b/src/USER-OMP/pair_yukawa_colloid_omp.h @@ -39,7 +39,7 @@ class PairYukawaColloidOMP : public PairYukawaColloid, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/pair_yukawa_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp index 1380e2239c..210c7fcc1e 100644 --- a/src/USER-OMP/pair_yukawa_omp.cpp +++ b/src/USER-OMP/pair_yukawa_omp.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairYukawaOMP::PairYukawaOMP(LAMMPS *lmp) : - PairYukawa(lmp), ThrOMP(lmp, PAIR) + PairYukawa(lmp), ThrOMP(lmp, THR_PAIR) { respa_enable = 0; } @@ -36,7 +36,6 @@ void PairYukawaOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); - ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; @@ -44,38 +43,34 @@ void PairYukawaOMP::compute(int eflag, int vflag) const int inum = list->inum; #if defined(_OPENMP) -#pragma omp parallel default(shared) +#pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; - double **f; - f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { if (eflag) { - if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); - else eval<1,1,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); } else { - if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); - else eval<1,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); } } else { - if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); - else eval<0,0,0>(f, ifrom, ito, tid); + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); } - // reduce per thread forces into global force array. - data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region - - // reduce per thread energy and virial, if requested. - if (evflag) ev_reduce_thr(this); - if (vflag_fdotr) virial_fdotr_compute(); } template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid) +void PairYukawaOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; @@ -84,10 +79,11 @@ void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid) evdwl = 0.0; - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; @@ -141,8 +137,8 @@ void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid) evdwl *= factor; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); } } f[i][0] += fxtmp; diff --git a/src/USER-OMP/pair_yukawa_omp.h b/src/USER-OMP/pair_yukawa_omp.h index e363ac6d17..99abc569fa 100644 --- a/src/USER-OMP/pair_yukawa_omp.h +++ b/src/USER-OMP/pair_yukawa_omp.h @@ -39,7 +39,7 @@ class PairYukawaOMP : public PairYukawa, public ThrOMP { private: template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(double **f, int ifrom, int ito, int tid); + void eval(int ifrom, int ito, ThrData * const thr); }; } diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp index 37ce1f198b..19537868e5 100644 --- a/src/USER-OMP/thr_omp.cpp +++ b/src/USER-OMP/thr_omp.cpp @@ -16,213 +16,481 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ -#include "thr_omp.h" - -#include "memory.h" - #include "atom.h" #include "comm.h" +#include "error.h" #include "force.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" + +#include "thr_omp.h" #include "pair.h" +#include "bond.h" +#include "angle.h" #include "dihedral.h" - -#if defined(_OPENMP) -#include <omp.h> -#endif +#include "improper.h" +#include "kspace.h" #include "math_const.h" +#include <string.h> + using namespace LAMMPS_NS; using namespace MathConst; /* ---------------------------------------------------------------------- */ -ThrOMP::ThrOMP(LAMMPS *ptr, int style) : thr_style(style), lmp(ptr) +ThrOMP::ThrOMP(LAMMPS *ptr, int style) : lmp(ptr), fix(NULL), thr_style(style) { - // initialize fixed size per thread storage - eng_vdwl_thr = eng_coul_thr = eng_bond_thr = NULL; - virial_thr = NULL; - - lmp->memory->create(eng_vdwl_thr,lmp->comm->nthreads,"thr_omp:eng_vdwl_thr"); - lmp->memory->create(eng_coul_thr,lmp->comm->nthreads,"thr_omp:eng_coul_thr"); - lmp->memory->create(eng_bond_thr,lmp->comm->nthreads,"thr_omp:eng_bond_thr"); - lmp->memory->create(virial_thr,lmp->comm->nthreads,6,"thr_omp:virial_thr"); - - // variable size per thread, per atom storage - // the actually allocation happens via memory->grow() in ev_steup_thr() - maxeatom_thr = maxvatom_thr = 0; - evflag_global = evflag_atom = 0; - eatom_thr = NULL; - vatom_thr = NULL; + // register fix omp with this class + int ifix = lmp->modify->find_fix("package_omp"); + if (ifix < 0) + lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles"); + fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]); } /* ---------------------------------------------------------------------- */ ThrOMP::~ThrOMP() { - lmp->memory->destroy(eng_vdwl_thr); - lmp->memory->destroy(eng_coul_thr); - lmp->memory->destroy(eng_bond_thr); - lmp->memory->destroy(virial_thr); - lmp->memory->destroy(eatom_thr); - lmp->memory->destroy(vatom_thr); + // nothing to do? } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Hook up per thread per atom arrays into the tally infrastructure + ---------------------------------------------------------------------- */ -void ThrOMP::ev_setup_acc_thr(int ntotal, int eflag_global, int vflag_global, - int eflag_atom, int vflag_atom, int nthreads) +void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom, + double **vatom, ThrData *thr) { - int t,i; - - evflag_global = (eflag_global || vflag_global); - evflag_atom = (eflag_atom || vflag_atom); + const int tid = thr->get_tid(); - for (t = 0; t < nthreads; ++t) { + if (thr_style & THR_PAIR) { + if (eflag & 2) { + thr->eatom_pair = eatom + tid*nall; + memset(&(thr->eatom_pair[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_pair = vatom + tid*nall; + memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double)); + } + } + + if (thr_style & THR_BOND) { + if (eflag & 2) { + thr->eatom_bond = eatom + tid*nall; + memset(&(thr->eatom_bond[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_bond = vatom + tid*nall; + memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double)); + } + } - if (eflag_global) - eng_vdwl_thr[t] = eng_coul_thr[t] = eng_bond_thr[t] = 0.0; + if (thr_style & THR_ANGLE) { + if (eflag & 2) { + thr->eatom_angle = eatom + tid*nall; + memset(&(thr->eatom_angle[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_angle = vatom + tid*nall; + memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double)); + } + } - if (vflag_global) - for (i = 0; i < 6; ++i) - virial_thr[t][i] = 0.0; + if (thr_style & THR_DIHEDRAL) { + if (eflag & 2) { + thr->eatom_dihed = eatom + tid*nall; + memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_dihed = vatom + tid*nall; + memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double)); + } + } - if (eflag_atom) - for (i = 0; i < ntotal; ++i) - eatom_thr[t][i] = 0.0; - - if (vflag_atom) - for (i = 0; i < ntotal; ++i) { - vatom_thr[t][i][0] = 0.0; - vatom_thr[t][i][1] = 0.0; - vatom_thr[t][i][2] = 0.0; - vatom_thr[t][i][3] = 0.0; - vatom_thr[t][i][4] = 0.0; - vatom_thr[t][i][5] = 0.0; - } + if (thr_style & THR_IMPROPER) { + if (eflag & 2) { + thr->eatom_imprp = eatom + tid*nall; + memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_imprp = vatom + tid*nall; + memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double)); + } } -} -/* ---------------------------------------------------------------------- */ +#if 0 /* not supported (yet) */ + if (thr_style & THR_KSPACE) { + if (eflag & 2) { + thr->eatom_kspce = eatom + tid*nall; + memset(&(thr->eatom_kspce[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_kspce = vatom + tid*nall; + memset(&(thr->vatom_kspce[0][0]),0,nall*6*sizeof(double)); + } + } +#endif +} -void ThrOMP::ev_setup_thr(Dihedral *dihed) +/* ---------------------------------------------------------------------- + Reduce per thread data into the regular structures + Reduction of global properties is serialized with a "critical" + directive, so that only one thread at a time will access the + global variables. Since we are not synchronized, this should + come with little overhead. The reduction of per-atom properties + in contrast is parallelized over threads in the same way as forces. + ---------------------------------------------------------------------- */ + +void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, + ThrData *const thr, const int nproxy) { - int nthreads = lmp->comm->nthreads; + const int nlocal = lmp->atom->nlocal; + const int nghost = lmp->atom->nghost; + const int nall = nlocal + nghost; + const int nfirst = lmp->atom->nfirst; + const int nthreads = lmp->comm->nthreads; + const int evflag = eflag | vflag; + + const int tid = thr->get_tid(); + double **f = lmp->atom->f; + double **x = lmp->atom->x; + + switch (thr_style) { - // reallocate per-atom arrays if necessary - if (dihed->eflag_atom && lmp->atom->nmax > maxeatom_thr) { - maxeatom_thr = lmp->atom->nmax; - lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr"); + case THR_PAIR: { + Pair * const pair = lmp->force->pair; + + if (pair->vflag_fdotr) { + if (lmp->neighbor->includegroup == 0) + thr->virial_fdotr_compute(x, nlocal, nghost, -1); + else + thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); + } + + if (evflag) { +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + thr->eng_vdwl = 0.0; + thr->eng_coul = 0.0; + } + if (vflag & 3) + for (int i=0; i < 6; ++i) { + pair->virial[i] += thr->virial_pair[i]; + thr->virial_pair[i] = 0.0; + } + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } } - if (dihed->vflag_atom && lmp->atom->nmax > maxvatom_thr) { - maxvatom_thr = lmp->atom->nmax; - lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr"); + break; + + case THR_PAIR|THR_PROXY: { + Pair * const pair = lmp->force->pair; + + if (tid >= nproxy && pair->vflag_fdotr) { + if (lmp->neighbor->includegroup == 0) + thr->virial_fdotr_compute(x, nlocal, nghost, -1); + else + thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); + } + + if (evflag) { +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (tid < nproxy) { + // nothing to do for kspace? + if (vflag & 3) + for (int i=0; i < 6; ++i) { + thr->virial_pair[i] = 0.0; + } + } else { + if (eflag & 1) { + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + thr->eng_vdwl = 0.0; + thr->eng_coul = 0.0; + } + if (vflag & 3) + for (int i=0; i < 6; ++i) { + pair->virial[i] += thr->virial_pair[i]; + thr->virial_pair[i] = 0.0; + } + } + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } } + break; - int ntotal = (lmp->force->newton_bond) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; + case THR_BOND: - // set up per thread accumulators - ev_setup_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global, - dihed->eflag_atom, dihed->vflag_atom, nthreads); -} + if (evflag) { + Bond * const bond = lmp->force->bond; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + bond->energy += thr->eng_bond; + for (int i=0; i < 6; ++i) + bond->virial[i] += thr->virial_bond[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; -/* ---------------------------------------------------------------------- */ + case THR_ANGLE: -void ThrOMP::ev_setup_thr(Pair *pair) -{ - int nthreads = lmp->comm->nthreads; + if (evflag) { + Angle * const angle = lmp->force->angle; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + angle->energy += thr->eng_angle; + for (int i=0; i < 6; ++i) + angle->virial[i] += thr->virial_angle[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; - // reallocate per-atom arrays if necessary - if (pair->eflag_atom && lmp->atom->nmax > maxeatom_thr) { - maxeatom_thr = lmp->atom->nmax; - lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr"); - } - if (pair->vflag_atom && lmp->atom->nmax > maxvatom_thr) { - maxvatom_thr = lmp->atom->nmax; - lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr"); - } + case THR_DIHEDRAL: + + if (evflag) { + Dihedral * const dihedral = lmp->force->dihedral; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + dihedral->energy += thr->eng_dihed; + for (int i=0; i < 6; ++i) + dihedral->virial[i] += thr->virial_dihed[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; - int ntotal = (lmp->force->newton) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; + case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals + + if (evflag) { + Dihedral * const dihedral = lmp->force->dihedral; + Pair * const pair = lmp->force->pair; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + dihedral->energy += thr->eng_dihed; + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + } + + if (vflag & 3) { + for (int i=0; i < 6; ++i) { + dihedral->virial[i] += thr->virial_dihed[i]; + pair->virial[i] += thr->virial_pair[i]; + } + } + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; + + case THR_IMPROPER: + + if (evflag) { + Improper *improper = lmp->force->improper; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + improper->energy += thr->eng_imprp; + for (int i=0; i < 6; ++i) + improper->virial[i] += thr->virial_imprp[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; + + case THR_KSPACE|THR_PROXY: // fallthrough + case THR_KSPACE: + // nothing to do (for now) +#if 0 + if (evflag) { + KSpace *kspace = lmp->force->kspace; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + kspace->energy += thr->eng_kspce; + for (int i=0; i < 6; ++i) + kspace->virial[i] += thr->virial_kspce[i]; + } + if (eflag & 2) { + sync_threads(); + data_reduce_thr(&(kspace->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + sync_threads(); + data_reduce_thr(&(kspace->vatom[0][0]), nall, nthreads, 6, tid); + } + } +#endif + break; - // set up per thread accumulators - ev_setup_acc_thr(ntotal, pair->eflag_global, pair->vflag_global, - pair->eflag_atom, pair->vflag_atom, nthreads); + default: + printf("tid:%d unhandled thr_style case %d\n", tid, thr_style); + break; + } + + if (style == fix->last_omp_style) { + sync_threads(); + data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + if (lmp->atom->torque) + data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid); + } } /* ---------------------------------------------------------------------- - reduce the per thread accumulated E/V data into the canonical accumulators. + tally eng_vdwl and eng_coul into per thread global and per-atom accumulators ------------------------------------------------------------------------- */ -void ThrOMP::ev_reduce_thr(Dihedral *dihed) + +void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double evdwl, const double ecoul, ThrData * const thr) { - int nthreads = lmp->comm->nthreads; - int ntotal = (lmp->force->newton_bond) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; - - for (int n = 0; n < nthreads; ++n) { - dihed->energy += eng_bond_thr[n]; - if (dihed->vflag_either) { - dihed->virial[0] += virial_thr[n][0]; - dihed->virial[1] += virial_thr[n][1]; - dihed->virial[2] += virial_thr[n][2]; - dihed->virial[3] += virial_thr[n][3]; - dihed->virial[4] += virial_thr[n][4]; - dihed->virial[5] += virial_thr[n][5]; - if (dihed->vflag_atom) { - for (int i = 0; i < ntotal; ++i) { - dihed->vatom[i][0] += vatom_thr[n][i][0]; - dihed->vatom[i][1] += vatom_thr[n][i][1]; - dihed->vatom[i][2] += vatom_thr[n][i][2]; - dihed->vatom[i][3] += vatom_thr[n][i][3]; - dihed->vatom[i][4] += vatom_thr[n][i][4]; - dihed->vatom[i][5] += vatom_thr[n][i][5]; - } + if (pair->eflag_global) { + if (newton_pair) { + thr->eng_vdwl += evdwl; + thr->eng_coul += ecoul; + } else { + const double evdwlhalf = 0.5*evdwl; + const double ecoulhalf = 0.5*ecoul; + if (i < nlocal) { + thr->eng_vdwl += evdwlhalf; + thr->eng_coul += ecoulhalf; } - } - if (dihed->eflag_atom) { - for (int i = 0; i < ntotal; ++i) { - dihed->eatom[i] += eatom_thr[n][i]; + if (j < nlocal) { + thr->eng_vdwl += evdwlhalf; + thr->eng_coul += ecoulhalf; } } } + if (pair->eflag_atom) { + const double epairhalf = 0.5 * (evdwl + ecoul); + if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf; + if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf; + } +} + +/* helper functions */ +static void v_tally(double * const vout, const double * const vin) +{ + vout[0] += vin[0]; + vout[1] += vin[1]; + vout[2] += vin[2]; + vout[3] += vin[3]; + vout[4] += vin[4]; + vout[5] += vin[5]; +} + +static void v_tally(double * const vout, const double scale, const double * const vin) +{ + vout[0] += scale*vin[0]; + vout[1] += scale*vin[1]; + vout[2] += scale*vin[2]; + vout[3] += scale*vin[3]; + vout[4] += scale*vin[4]; + vout[5] += scale*vin[5]; } /* ---------------------------------------------------------------------- - reduce the per thread accumulated E/V data into the canonical accumulators. + tally virial into per thread global and per-atom accumulators ------------------------------------------------------------------------- */ -void ThrOMP::ev_reduce_thr(Pair *pair) +void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double * const v, ThrData * const thr) { - const int nthreads = lmp->comm->nthreads; - const int ntotal = (lmp->force->newton) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; - - for (int n = 0; n < nthreads; ++n) { - pair->eng_vdwl += eng_vdwl_thr[n]; - pair->eng_coul += eng_coul_thr[n]; - if (pair->vflag_either) { - pair->virial[0] += virial_thr[n][0]; - pair->virial[1] += virial_thr[n][1]; - pair->virial[2] += virial_thr[n][2]; - pair->virial[3] += virial_thr[n][3]; - pair->virial[4] += virial_thr[n][4]; - pair->virial[5] += virial_thr[n][5]; - if (pair->vflag_atom) { - for (int i = 0; i < ntotal; ++i) { - pair->vatom[i][0] += vatom_thr[n][i][0]; - pair->vatom[i][1] += vatom_thr[n][i][1]; - pair->vatom[i][2] += vatom_thr[n][i][2]; - pair->vatom[i][3] += vatom_thr[n][i][3]; - pair->vatom[i][4] += vatom_thr[n][i][4]; - pair->vatom[i][5] += vatom_thr[n][i][5]; - } - } + if (pair->vflag_global) { + double * const va = thr->virial_pair; + if (newton_pair) { + v_tally(va,v); + } else { + if (i < nlocal) v_tally(va,0.5,v); + if (j < nlocal) v_tally(va,0.5,v); } - if (pair->eflag_atom) { - for (int i = 0; i < ntotal; ++i) { - pair->eatom[i] += eatom_thr[n][i]; - } + } + + if (pair->vflag_atom) { + if (newton_pair || i < nlocal) { + double * const va = thr->vatom_pair[i]; + v_tally(va,0.5,v); + } + if (newton_pair || j < nlocal) { + double * const va = thr->vatom_pair[j]; + v_tally(va,0.5,v); } } } @@ -232,39 +500,17 @@ void ThrOMP::ev_reduce_thr(Pair *pair) need i < nlocal test since called by bond_quartic and dihedral_charmm ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal, - int newton_pair, double evdwl, double ecoul, - double fpair, double delx, double dely, - double delz, int tid) +void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, + const int newton_pair, const double evdwl, const double ecoul, + const double fpair, const double delx, const double dely, + const double delz, ThrData * const thr) { - double evdwlhalf,ecoulhalf,epairhalf,v[6]; - if (pair->eflag_either) { - if (pair->eflag_global) { - if (newton_pair) { - eng_vdwl_thr[tid] += evdwl; - eng_coul_thr[tid] += ecoul; - } else { - evdwlhalf = 0.5*evdwl; - ecoulhalf = 0.5*ecoul; - if (i < nlocal) { - eng_vdwl_thr[tid] += evdwlhalf; - eng_coul_thr[tid] += ecoulhalf; - } - if (j < nlocal) { - eng_vdwl_thr[tid] += evdwlhalf; - eng_coul_thr[tid] += ecoulhalf; - } - } - } - if (pair->eflag_atom) { - epairhalf = 0.5 * (evdwl + ecoul); - if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf; - if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf; - } - } + if (pair->eflag_either) + e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); if (pair->vflag_either) { + double v[6]; v[0] = delx*delx*fpair; v[1] = dely*dely*fpair; v[2] = delz*delz*fpair; @@ -272,52 +518,7 @@ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal, v[4] = delx*delz*fpair; v[5] = dely*delz*fpair; - if (pair->vflag_global) { - if (newton_pair) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; - } else { - if (i < nlocal) { - virial_thr[tid][0] += 0.5*v[0]; - virial_thr[tid][1] += 0.5*v[1]; - virial_thr[tid][2] += 0.5*v[2]; - virial_thr[tid][3] += 0.5*v[3]; - virial_thr[tid][4] += 0.5*v[4]; - virial_thr[tid][5] += 0.5*v[5]; - } - if (j < nlocal) { - virial_thr[tid][0] += 0.5*v[0]; - virial_thr[tid][1] += 0.5*v[1]; - virial_thr[tid][2] += 0.5*v[2]; - virial_thr[tid][3] += 0.5*v[3]; - virial_thr[tid][4] += 0.5*v[4]; - virial_thr[tid][5] += 0.5*v[5]; - } - } - } - - if (pair->vflag_atom) { - if (newton_pair || i < nlocal) { - vatom_thr[tid][i][0] += 0.5*v[0]; - vatom_thr[tid][i][1] += 0.5*v[1]; - vatom_thr[tid][i][2] += 0.5*v[2]; - vatom_thr[tid][i][3] += 0.5*v[3]; - vatom_thr[tid][i][4] += 0.5*v[4]; - vatom_thr[tid][i][5] += 0.5*v[5]; - } - if (newton_pair || j < nlocal) { - vatom_thr[tid][j][0] += 0.5*v[0]; - vatom_thr[tid][j][1] += 0.5*v[1]; - vatom_thr[tid][j][2] += 0.5*v[2]; - vatom_thr[tid][j][3] += 0.5*v[3]; - vatom_thr[tid][j][4] += 0.5*v[4]; - vatom_thr[tid][j][5] += 0.5*v[5]; - } - } + v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); } } @@ -326,39 +527,19 @@ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal, for virial, have delx,dely,delz and fx,fy,fz ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal, - int newton_pair, double evdwl, double ecoul, - double fx, double fy, double fz, - double delx, double dely, double delz, int tid) +void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double evdwl, const double ecoul, + const double fx, const double fy, const double fz, + const double delx, const double dely, const double delz, + ThrData * const thr) { - double evdwlhalf,ecoulhalf,epairhalf,v[6]; - if (pair->eflag_either) { - if (pair->eflag_global) { - if (newton_pair) { - eng_vdwl_thr[tid] += evdwl; - eng_coul_thr[tid] += ecoul; - } else { - evdwlhalf = 0.5*evdwl; - ecoulhalf = 0.5*ecoul; - if (i < nlocal) { - eng_vdwl_thr[tid] += evdwlhalf; - eng_coul_thr[tid] += ecoulhalf; - } - if (j < nlocal) { - eng_vdwl_thr[tid] += evdwlhalf; - eng_coul_thr[tid] += ecoulhalf; - } - } - } - if (pair->eflag_atom) { - epairhalf = 0.5 * (evdwl + ecoul); - if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf; - if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf; - } - } + if (pair->eflag_either) + e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); if (pair->vflag_either) { + double v[6]; v[0] = delx*fx; v[1] = dely*fy; v[2] = delz*fz; @@ -366,52 +547,7 @@ void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal, v[4] = delx*fz; v[5] = dely*fz; - if (pair->vflag_global) { - if (newton_pair) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; - } else { - if (i < nlocal) { - virial_thr[tid][0] += 0.5*v[0]; - virial_thr[tid][1] += 0.5*v[1]; - virial_thr[tid][2] += 0.5*v[2]; - virial_thr[tid][3] += 0.5*v[3]; - virial_thr[tid][4] += 0.5*v[4]; - virial_thr[tid][5] += 0.5*v[5]; - } - if (j < nlocal) { - virial_thr[tid][0] += 0.5*v[0]; - virial_thr[tid][1] += 0.5*v[1]; - virial_thr[tid][2] += 0.5*v[2]; - virial_thr[tid][3] += 0.5*v[3]; - virial_thr[tid][4] += 0.5*v[4]; - virial_thr[tid][5] += 0.5*v[5]; - } - } - } - - if (pair->vflag_atom) { - if (newton_pair || i < nlocal) { - vatom_thr[tid][i][0] += 0.5*v[0]; - vatom_thr[tid][i][1] += 0.5*v[1]; - vatom_thr[tid][i][2] += 0.5*v[2]; - vatom_thr[tid][i][3] += 0.5*v[3]; - vatom_thr[tid][i][4] += 0.5*v[4]; - vatom_thr[tid][i][5] += 0.5*v[5]; - } - if (newton_pair || j < nlocal) { - vatom_thr[tid][j][0] += 0.5*v[0]; - vatom_thr[tid][j][1] += 0.5*v[1]; - vatom_thr[tid][j][2] += 0.5*v[2]; - vatom_thr[tid][j][3] += 0.5*v[3]; - vatom_thr[tid][j][4] += 0.5*v[4]; - vatom_thr[tid][j][5] += 0.5*v[5]; - } - } + v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); } } @@ -421,25 +557,28 @@ void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal, virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double ecoul, - double *fj, double *fk, double *drji, double *drki, int tid) +void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k, + const double evdwl, const double ecoul, + const double * const fj, const double * const fk, + const double * const drji, const double * const drki, + ThrData * const thr) { - double epairthird,v[6]; - if (pair->eflag_either) { if (pair->eflag_global) { - eng_vdwl_thr[tid] += evdwl; - eng_coul_thr[tid] += ecoul; + thr->eng_vdwl += evdwl; + thr->eng_coul += ecoul; } if (pair->eflag_atom) { - epairthird = THIRD * (evdwl + ecoul); - eatom_thr[tid][i] += epairthird; - eatom_thr[tid][j] += epairthird; - eatom_thr[tid][k] += epairthird; + const double epairthird = THIRD * (evdwl + ecoul); + thr->eatom_pair[i] += epairthird; + thr->eatom_pair[j] += epairthird; + thr->eatom_pair[k] += epairthird; } } if (pair->vflag_either) { + double v[6]; + v[0] = drji[0]*fj[0] + drki[0]*fk[0]; v[1] = drji[1]*fj[1] + drki[1]*fk[1]; v[2] = drji[2]*fj[2] + drki[2]*fk[2]; @@ -447,21 +586,12 @@ void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double v[4] = drji[0]*fj[2] + drki[0]*fk[2]; v[5] = drji[1]*fj[2] + drki[1]*fk[2]; - if (pair->vflag_global) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; - } + if (pair->vflag_global) v_tally(thr->virial_pair,v); if (pair->vflag_atom) { - for (int n=0; n < 6; ++n) { - vatom_thr[tid][i][n] += THIRD*v[n]; - vatom_thr[tid][j][n] += THIRD*v[n]; - vatom_thr[tid][k][n] += THIRD*v[n]; - } + v_tally(thr->vatom_pair[i],THIRD,v); + v_tally(thr->vatom_pair[j],THIRD,v); + v_tally(thr->vatom_pair[k],THIRD,v); } } } @@ -471,20 +601,23 @@ void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double called by AIREBO potential, newton_pair is always on ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl, - double *fi, double *fj, double *fk, - double *drim, double *drjm, double *drkm,int tid) +void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j, + const int k, const int m, const double evdwl, + const double * const fi, const double * const fj, + const double * const fk, const double * const drim, + const double * const drjm, const double * const drkm, + ThrData * const thr) { - double epairfourth,v[6]; + double v[6]; if (pair->eflag_either) { - if (pair->eflag_global) eng_vdwl_thr[tid] += evdwl; + if (pair->eflag_global) thr->eng_vdwl += evdwl; if (pair->eflag_atom) { - epairfourth = 0.25 * evdwl; - eatom_thr[tid][i] += epairfourth; - eatom_thr[tid][j] += epairfourth; - eatom_thr[tid][k] += epairfourth; - eatom_thr[tid][m] += epairfourth; + const double epairfourth = 0.25 * evdwl; + thr->eatom_pair[i] += epairfourth; + thr->eatom_pair[j] += epairfourth; + thr->eatom_pair[k] += epairfourth; + thr->eatom_pair[m] += epairfourth; } } @@ -496,14 +629,10 @@ void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl, v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); - vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; - vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; - vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; - vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; - vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; - vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2]; - vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5]; + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); + v_tally(thr->vatom_pair[m],v); } } @@ -513,48 +642,248 @@ void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl, changes v values by dividing by n ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, double *v, int tid) +void ThrOMP::ev_tally_list_thr(Pair * const pair, const int n, + const int * const list, const double ecoul, + const double * const v, ThrData * const thr) { - int i,j; - if (pair->eflag_either) { - if (pair->eflag_global) eng_coul_thr[tid] += ecoul; + if (pair->eflag_global) thr->eng_coul += ecoul; if (pair->eflag_atom) { - double epairatom = ecoul/n; - for (i = 0; i < n; i++) eatom_thr[tid][list[i]] += epairatom; + double epairatom = ecoul/static_cast<double>(n); + for (int i = 0; i < n; i++) thr->eatom_pair[list[i]] += epairatom; } } if (pair->vflag_either) { - if (pair->vflag_global) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; - } + if (pair->vflag_global) + v_tally(thr->virial_pair,v); if (pair->vflag_atom) { - v[0] /= n; - v[1] /= n; - v[2] /= n; - v[3] /= n; - v[4] /= n; - v[5] /= n; - for (i = 0; i < n; i++) { - j = list[i]; - vatom_thr[tid][j][0] += v[0]; - vatom_thr[tid][j][1] += v[1]; - vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; - vatom_thr[tid][j][4] += v[4]; - vatom_thr[tid][j][5] += v[5]; + const double s = 1.0/static_cast<double>(n); + double vtmp[6]; + + vtmp[0] = s * v[0]; + vtmp[1] = s * v[1]; + vtmp[2] = s * v[2]; + vtmp[3] = s * v[3]; + vtmp[4] = s * v[4]; + vtmp[5] = s * v[5]; + + for (int i = 0; i < n; i++) { + const int j = list[i]; + v_tally(thr->vatom_pair[j],vtmp); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal, + const int newton_bond, const double ebond, const double fbond, + const double delx, const double dely, const double delz, + ThrData * const thr) +{ + if (bond->eflag_either) { + const double ebondhalf = 0.5*ebond; + if (newton_bond) { + if (bond->eflag_global) + thr->eng_bond += ebond; + if (bond->eflag_atom) { + thr->eatom_bond[i] += ebondhalf; + thr->eatom_bond[j] += ebondhalf; + } + } else { + if (bond->eflag_global) { + if (i < nlocal) thr->eng_bond += ebondhalf; + if (j < nlocal) thr->eng_bond += ebondhalf; + } + if (bond->eflag_atom) { + if (i < nlocal) thr->eatom_bond[i] += ebondhalf; + if (j < nlocal) thr->eatom_bond[j] += ebondhalf; + } + } + } + + if (bond->vflag_either) { + double v[6]; + + v[0] = delx*delx*fbond; + v[1] = dely*dely*fbond; + v[2] = delz*delz*fbond; + v[3] = delx*dely*fbond; + v[4] = delx*delz*fbond; + v[5] = dely*delz*fbond; + + if (bond->vflag_global) { + if (newton_bond) + v_tally(thr->virial_bond,v); + else { + if (i < nlocal) + v_tally(thr->virial_bond,0.5,v); + if (j < nlocal) + v_tally(thr->virial_bond,0.5,v); + } + } + + if (bond->vflag_atom) { + v[0] *= 0.5; + v[1] *= 0.5; + v[2] *= 0.5; + v[3] *= 0.5; + v[4] *= 0.5; + v[5] *= 0.5; + + if (newton_bond) { + v_tally(thr->vatom_bond[i],v); + v_tally(thr->vatom_bond[j],v); + } else { + if (j < nlocal) + v_tally(thr->vatom_bond[i],v); + if (j < nlocal) + v_tally(thr->vatom_bond[j],v); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3 +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k, + const int nlocal, const int newton_bond, const double eangle, + const double * const f1, const double * const f3, + const double delx1, const double dely1, const double delz1, + const double delx2, const double dely2, const double delz2, + ThrData * const thr) +{ + if (angle->eflag_either) { + const double eanglethird = THIRD*eangle; + if (newton_bond) { + if (angle->eflag_global) + thr->eng_angle += eangle; + if (angle->eflag_atom) { + thr->eatom_angle[i] += eanglethird; + thr->eatom_angle[j] += eanglethird; + thr->eatom_angle[k] += eanglethird; } + } else { + if (angle->eflag_global) { + if (i < nlocal) thr->eng_angle += eanglethird; + if (j < nlocal) thr->eng_angle += eanglethird; + if (k < nlocal) thr->eng_angle += eanglethird; + } + if (angle->eflag_atom) { + if (i < nlocal) thr->eatom_angle[i] += eanglethird; + if (j < nlocal) thr->eatom_angle[j] += eanglethird; + if (k < nlocal) thr->eatom_angle[k] += eanglethird; + } + } + } + + if (angle->vflag_either) { + double v[6]; + + v[0] = delx1*f1[0] + delx2*f3[0]; + v[1] = dely1*f1[1] + dely2*f3[1]; + v[2] = delz1*f1[2] + delz2*f3[2]; + v[3] = delx1*f1[1] + delx2*f3[1]; + v[4] = delx1*f1[2] + delx2*f3[2]; + v[5] = dely1*f1[2] + dely2*f3[2]; + + if (angle->vflag_global) { + if (newton_bond) { + v_tally(thr->virial_angle,v); + } else { + int cnt = 0; + if (i < nlocal) ++cnt; + if (j < nlocal) ++cnt; + if (k < nlocal) ++cnt; + v_tally(thr->virial_angle,cnt*THIRD,v); + } + } + + if (angle->vflag_atom) { + v[0] *= THIRD; + v[1] *= THIRD; + v[2] *= THIRD; + v[3] *= THIRD; + v[4] *= THIRD; + v[5] *= THIRD; + + if (newton_bond) { + v_tally(thr->vatom_angle[i],v); + v_tally(thr->vatom_angle[j],v); + v_tally(thr->vatom_angle[k],v); + } else { + if (j < nlocal) v_tally(thr->vatom_angle[i],v); + if (j < nlocal) v_tally(thr->vatom_angle[j],v); + if (k < nlocal) v_tally(thr->vatom_angle[k],v); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial from 1-3 repulsion of SDK angle into accumulators +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3, + const int nlocal, const int newton_bond, + const double epair, const double fpair, + const double delx, const double dely, + const double delz, ThrData * const thr) +{ + + if (angle->eflag_either) { + const double epairhalf = 0.5 * epair; + + if (angle->eflag_global) { + if (newton_bond || i1 < nlocal) + thr->eng_angle += epairhalf; + if (newton_bond || i3 < nlocal) + thr->eng_angle += epairhalf; + } + + if (angle->eflag_atom) { + if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf; + if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf; } } + + if (angle->vflag_either) { + double v[6]; + v[0] = delx*delx*fpair; + v[1] = dely*dely*fpair; + v[2] = delz*delz*fpair; + v[3] = delx*dely*fpair; + v[4] = delx*delz*fpair; + v[5] = dely*delz*fpair; + + if (angle->vflag_global) { + double * const va = thr->virial_angle; + if (newton_bond || i1 < nlocal) v_tally(va,0.5,v); + if (newton_bond || i3 < nlocal) v_tally(va,0.5,v); + } + + if (angle->vflag_atom) { + if (newton_bond || i1 < nlocal) { + double * const va = thr->vatom_angle[i1]; + v_tally(va,0.5,v); + } + if (newton_bond || i3 < nlocal) { + double * const va = thr->vatom_angle[i3]; + v_tally(va,0.5,v); + } + } + } } + /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 @@ -562,40 +891,48 @@ void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, doubl = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 ------------------------------------------------------------------------- */ -void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4, - int nlocal, int newton_bond, - double edihedral, double *f1, double *f3, double *f4, - double vb1x, double vb1y, double vb1z, - double vb2x, double vb2y, double vb2z, - double vb3x, double vb3y, double vb3z, int tid) +void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2, + const int i3, const int i4, const int nlocal, + const int newton_bond, const double edihedral, + const double * const f1, const double * const f3, + const double * const f4, const double vb1x, + const double vb1y, const double vb1z, const double vb2x, + const double vb2y, const double vb2z, const double vb3x, + const double vb3y, const double vb3z, ThrData * const thr) { - double edihedralquarter,v[6]; - int cnt; if (dihed->eflag_either) { if (dihed->eflag_global) { if (newton_bond) { - eng_bond_thr[tid] += edihedral; + thr->eng_dihed += edihedral; } else { - edihedralquarter = 0.25*edihedral; - cnt = 0; + const double edihedralquarter = 0.25*edihedral; + int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; - eng_bond_thr[tid] += static_cast<double>(cnt) * edihedralquarter; + thr->eng_dihed += static_cast<double>(cnt)*edihedralquarter; } } if (dihed->eflag_atom) { - edihedralquarter = 0.25*edihedral; - if (newton_bond || i1 < nlocal) eatom_thr[tid][i1] += edihedralquarter; - if (newton_bond || i2 < nlocal) eatom_thr[tid][i2] += edihedralquarter; - if (newton_bond || i3 < nlocal) eatom_thr[tid][i3] += edihedralquarter; - if (newton_bond || i4 < nlocal) eatom_thr[tid][i4] += edihedralquarter; + const double edihedralquarter = 0.25*edihedral; + if (newton_bond) { + thr->eatom_dihed[i1] += edihedralquarter; + thr->eatom_dihed[i2] += edihedralquarter; + thr->eatom_dihed[i3] += edihedralquarter; + thr->eatom_dihed[i4] += edihedralquarter; + } else { + if (i1 < nlocal) thr->eatom_dihed[i1] += edihedralquarter; + if (i2 < nlocal) thr->eatom_dihed[i2] += edihedralquarter; + if (i3 < nlocal) thr->eatom_dihed[i3] += edihedralquarter; + if (i4 < nlocal) thr->eatom_dihed[i4] += edihedralquarter; + } } } if (dihed->vflag_either) { + double v[6]; v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; @@ -605,80 +942,127 @@ void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4, if (dihed->vflag_global) { if (newton_bond) { - virial_thr[tid][0] += v[0]; - virial_thr[tid][1] += v[1]; - virial_thr[tid][2] += v[2]; - virial_thr[tid][3] += v[3]; - virial_thr[tid][4] += v[4]; - virial_thr[tid][5] += v[5]; + v_tally(thr->virial_dihed,v); } else { - if (i1 < nlocal) { - virial_thr[tid][0] += 0.25*v[0]; - virial_thr[tid][1] += 0.25*v[1]; - virial_thr[tid][2] += 0.25*v[2]; - virial_thr[tid][3] += 0.25*v[3]; - virial_thr[tid][4] += 0.25*v[4]; - virial_thr[tid][5] += 0.25*v[5]; - } - if (i2 < nlocal) { - virial_thr[tid][0] += 0.25*v[0]; - virial_thr[tid][1] += 0.25*v[1]; - virial_thr[tid][2] += 0.25*v[2]; - virial_thr[tid][3] += 0.25*v[3]; - virial_thr[tid][4] += 0.25*v[4]; - virial_thr[tid][5] += 0.25*v[5]; - } - if (i3 < nlocal) { - virial_thr[tid][0] += 0.25*v[0]; - virial_thr[tid][1] += 0.25*v[1]; - virial_thr[tid][2] += 0.25*v[2]; - virial_thr[tid][3] += 0.25*v[3]; - virial_thr[tid][4] += 0.25*v[4]; - virial_thr[tid][5] += 0.25*v[5]; - } - if (i4 < nlocal) { - virial_thr[tid][0] += 0.25*v[0]; - virial_thr[tid][1] += 0.25*v[1]; - virial_thr[tid][2] += 0.25*v[2]; - virial_thr[tid][3] += 0.25*v[3]; - virial_thr[tid][4] += 0.25*v[4]; - virial_thr[tid][5] += 0.25*v[5]; - } + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + v_tally(thr->virial_dihed,0.25*static_cast<double>(cnt),v); } } + v[0] *= 0.25; + v[1] *= 0.25; + v[2] *= 0.25; + v[3] *= 0.25; + v[4] *= 0.25; + v[5] *= 0.25; + if (dihed->vflag_atom) { - if (newton_bond || i1 < nlocal) { - vatom_thr[tid][i1][0] += 0.25*v[0]; - vatom_thr[tid][i1][1] += 0.25*v[1]; - vatom_thr[tid][i1][2] += 0.25*v[2]; - vatom_thr[tid][i1][3] += 0.25*v[3]; - vatom_thr[tid][i1][4] += 0.25*v[4]; - vatom_thr[tid][i1][5] += 0.25*v[5]; - } - if (newton_bond || i2 < nlocal) { - vatom_thr[tid][i2][0] += 0.25*v[0]; - vatom_thr[tid][i2][1] += 0.25*v[1]; - vatom_thr[tid][i2][2] += 0.25*v[2]; - vatom_thr[tid][i2][3] += 0.25*v[3]; - vatom_thr[tid][i2][4] += 0.25*v[4]; - vatom_thr[tid][i2][5] += 0.25*v[5]; + if (newton_bond) { + v_tally(thr->vatom_dihed[i1],v); + v_tally(thr->vatom_dihed[i2],v); + v_tally(thr->vatom_dihed[i3],v); + v_tally(thr->vatom_dihed[i4],v); + } else { + if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v); + if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v); + if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v); + if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v); } - if (newton_bond || i3 < nlocal) { - vatom_thr[tid][i3][0] += 0.25*v[0]; - vatom_thr[tid][i3][1] += 0.25*v[1]; - vatom_thr[tid][i3][2] += 0.25*v[2]; - vatom_thr[tid][i3][3] += 0.25*v[3]; - vatom_thr[tid][i3][4] += 0.25*v[4]; - vatom_thr[tid][i3][5] += 0.25*v[5]; - } - if (newton_bond || i4 < nlocal) { - vatom_thr[tid][i4][0] += 0.25*v[0]; - vatom_thr[tid][i4][1] += 0.25*v[1]; - vatom_thr[tid][i4][2] += 0.25*v[2]; - vatom_thr[tid][i4][3] += 0.25*v[3]; - vatom_thr[tid][i4][4] += 0.25*v[4]; - vatom_thr[tid][i4][5] += 0.25*v[5]; + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2, + const int i3, const int i4, const int nlocal, + const int newton_bond, const double eimproper, + const double * const f1, const double * const f3, + const double * const f4, const double vb1x, + const double vb1y, const double vb1z, const double vb2x, + const double vb2y, const double vb2z, const double vb3x, + const double vb3y, const double vb3z, ThrData * const thr) +{ + + if (imprp->eflag_either) { + if (imprp->eflag_global) { + if (newton_bond) { + thr->eng_imprp += eimproper; + } else { + const double eimproperquarter = 0.25*eimproper; + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + thr->eng_imprp += static_cast<double>(cnt)*eimproperquarter; + } + } + if (imprp->eflag_atom) { + const double eimproperquarter = 0.25*eimproper; + if (newton_bond) { + thr->eatom_imprp[i1] += eimproperquarter; + thr->eatom_imprp[i2] += eimproperquarter; + thr->eatom_imprp[i3] += eimproperquarter; + thr->eatom_imprp[i4] += eimproperquarter; + } else { + if (i1 < nlocal) thr->eatom_imprp[i1] += eimproperquarter; + if (i2 < nlocal) thr->eatom_imprp[i2] += eimproperquarter; + if (i3 < nlocal) thr->eatom_imprp[i3] += eimproperquarter; + if (i4 < nlocal) thr->eatom_imprp[i4] += eimproperquarter; + } + } + } + + if (imprp->vflag_either) { + double v[6]; + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (imprp->vflag_global) { + if (newton_bond) { + v_tally(thr->virial_imprp,v); + } else { + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + v_tally(thr->virial_imprp,0.25*static_cast<double>(cnt),v); + } + } + + v[0] *= 0.25; + v[1] *= 0.25; + v[2] *= 0.25; + v[3] *= 0.25; + v[4] *= 0.25; + v[5] *= 0.25; + + if (imprp->vflag_atom) { + if (newton_bond) { + v_tally(thr->vatom_imprp[i1],v); + v_tally(thr->vatom_imprp[i2],v); + v_tally(thr->vatom_imprp[i3],v); + v_tally(thr->vatom_imprp[i4],v); + } else { + if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v); + if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v); + if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v); + if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v); } } } @@ -690,7 +1074,8 @@ void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4, fpair is magnitude of force on atom I ------------------------------------------------------------------------- */ -void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid) +void ThrOMP::v_tally2_thr(const int i, const int j, const double fpair, + const double * const drij, ThrData * const thr) { double v[6]; @@ -701,10 +1086,8 @@ void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid) v[4] = 0.5 * drij[0]*drij[2]*fpair; v[5] = 0.5 * drij[1]*drij[2]*fpair; - vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; - vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; - vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); } /* ---------------------------------------------------------------------- @@ -712,8 +1095,10 @@ void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid) called by AIREBO and Tersoff potential, newton_pair is always on ------------------------------------------------------------------------- */ -void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj, - double *drik, double *drjk, int tid) +void ThrOMP::v_tally3_thr(const int i, const int j, const int k, + const double * const fi, const double * const fj, + const double * const drik, const double * const drjk, + ThrData * const thr) { double v[6]; @@ -724,12 +1109,9 @@ void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj, v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]); v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]); - vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; - vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; - vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; - vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; - vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); } /* ---------------------------------------------------------------------- @@ -737,9 +1119,11 @@ void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj, called by AIREBO potential, newton_pair is always on ------------------------------------------------------------------------- */ -void ThrOMP::v_tally4_thr(int i, int j, int k, int m, - double *fi, double *fj, double *fk, - double *drim, double *drjm, double *drkm, int tid) +void ThrOMP::v_tally4_thr(const int i, const int j, const int k, const int m, + const double * const fi, const double * const fj, + const double * const fk, const double * const drim, + const double * const drjm, const double * const drkm, + ThrData * const thr) { double v[6]; @@ -750,84 +1134,17 @@ void ThrOMP::v_tally4_thr(int i, int j, int k, int m, v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); - vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; - vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; - vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; - vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; - vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; - vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; - vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2]; - vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5]; -} - -/* ---------------------------------------------------------------------- */ - -// set loop range thread id, and force array offset for threaded runs. -double **ThrOMP::loop_setup_thr(double **f, int &ifrom, int &ito, int &tid, - int inum, int nall, int nthreads) -{ -#if defined(_OPENMP) - tid = omp_get_thread_num(); - - // each thread works on a fixed chunk of atoms. - const int idelta = 1 + inum/nthreads; - ifrom = tid*idelta; - ito = ifrom + idelta; - if (ito > inum) - ito = inum; - - return f + nall*tid; -#else - tid = 0; - ifrom = 0; - ito = inum; - return f; -#endif -} - -/* ---------------------------------------------------------------------- */ - -// reduce per thread data into the first part of the data -// array that is used for the non-threaded parts and reset -// the temporary storage to 0.0. this routine depends on -// multi-dimensional arrays like force stored in this order -// x1,y1,z1,x2,y2,z2,... -// we need to post a barrier to wait until all threads are done -// with writing to the array . -void ThrOMP::data_reduce_thr(double *dall, int nall, int nthreads, - int ndim, int tid) -{ -#if defined(_OPENMP) - // NOOP in non-threaded execution. - if (nthreads == 1) return; -#pragma omp barrier - { - const int nvals = ndim*nall; - const int idelta = nvals/nthreads + 1; - const int ifrom = tid*idelta; - const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); - - for (int m = ifrom; m < ito; ++m) { - for (int n = 1; n < nthreads; ++n) { - dall[m] += dall[n*nvals + m]; - dall[n*nvals + m] = 0.0; - } - } - } -#else - // NOOP in non-threaded execution. - return; -#endif + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); + v_tally(thr->vatom_pair[m],v); } /* ---------------------------------------------------------------------- */ double ThrOMP::memory_usage_thr() { - const int nthreads=lmp->comm->nthreads; - - double bytes = nthreads * (3 + 7) * sizeof(double); - bytes += nthreads * maxeatom_thr * sizeof(double); - bytes += nthreads * maxvatom_thr * 6 * sizeof(double); + double bytes=0.0; + return bytes; } diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h index 9966c9de00..32f7045124 100644 --- a/src/USER-OMP/thr_omp.h +++ b/src/USER-OMP/thr_omp.h @@ -19,39 +19,28 @@ #define LMP_THR_OMP_H #include "pointers.h" +#include "fix_omp.h" +#include "thr_data.h" namespace LAMMPS_NS { // forward declarations class Pair; +class Bond; +class Angle; class Dihedral; +class Improper; +class KSpace; +class Fix; class ThrOMP { - public: - struct global { - double eng_vdwl; - double eng_coul; - double eng_bond; - double virial[6]; - }; protected: - const int thr_style; - enum {PAIR=1, BOND, ANGLE, DIHEDRAL, IMPROPER, KSPACE, FIX, COMPUTE}; - - LAMMPS *lmp; // reference to base lammps object. - - double *eng_vdwl_thr; // per thread accumulated vdw energy - double *eng_coul_thr; // per thread accumulated coulomb energies - double *eng_bond_thr; // per thread accumlated bonded energy + LAMMPS *lmp; // reference to base lammps object. + FixOMP *fix; // pointer to fix_omp; - double **virial_thr; // per thread virial - double **eatom_thr; // per thread per atom energy - double ***vatom_thr; // per thread per atom virial + const int thr_style; - int maxeatom_thr, maxvatom_thr; - int evflag_global, evflag_atom; - public: ThrOMP(LAMMPS *, int); virtual ~ThrOMP(); @@ -65,50 +54,105 @@ class ThrOMP { { ; } }; + enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2, + THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5, + THR_CHARMM=1<<6,THR_PROXY=1<<7,THR_HYBRID=1<<8,THR_FIX=1<<9}; + protected: - // extra ev_tally work for threaded styles - void ev_setup_thr(Pair *); - void ev_setup_thr(Dihedral *); + // extra ev_tally setup work for threaded styles + void ev_setup_thr(int, int, int, double *, double **, ThrData *); - void ev_reduce_thr(Pair *); - void ev_reduce_thr(Dihedral *); + // compute global per thread virial contribution from per-thread force + void virial_fdotr_compute_thr(double * const, const double * const * const, + const double * const * const, + const int, const int, const int); - private: - // internal method to be used by multiple ev_setup_thr() methods - void ev_setup_acc_thr(int, int, int, int, int, int); + // reduce per thread data as needed + void reduce_thr(void * const style, const int eflag, const int vflag, ThrData * const thr, const int nproxy=0); protected: + // threading adapted versions of the ev_tally infrastructure // style specific versions (need access to style class flags) - void ev_tally_thr(Pair *, int, int, int, int, double, double, - double, double, double, double, int); - void ev_tally_xyz_thr(Pair *, int, int, int, int, double, double, - double, double, double, double, double, double, int); - void ev_tally3_thr(Pair *, int, int, int, double, double, - double *, double *, double *, double *, int); - void ev_tally4_thr(Pair *, int, int, int, int, double, - double *, double *, double *, - double *, double *, double *, int); - void ev_tally_list_thr(Pair *, int, int *, double , double *, int); - - void ev_tally_thr(Dihedral *, int, int, int, int, int, int, double, - double *, double *, double *, double, double, double, - double, double, double, double, double, double, int); - // style independent versions - void v_tally2_thr(int, int, double, double *, int); - void v_tally3_thr(int, int, int, double *, double *, double *, double *, int); - void v_tally4_thr(int, int, int, int, double *, double *, double *, - double *, double *, double *, int); + // Pair + void e_tally_thr(Pair * const, const int, const int, const int, + const int, const double, const double, ThrData * const); + void v_tally_thr(Pair * const, const int, const int, const int, + const int, const double * const, ThrData * const); + + void ev_tally_thr(Pair * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, const double, ThrData * const); + void ev_tally_xyz_thr(Pair * const, const int, const int, const int, + const int, const double, const double, const double, + const double, const double, const double, + const double, const double, ThrData * const); + void ev_tally3_thr(Pair * const, const int, const int, const int, const double, + const double, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void ev_tally4_thr(Pair * const, const int, const int, const int, const int, + const double, const double * const, const double * const, + const double * const, const double * const, const double * const, + const double * const, ThrData * const); + + // Bond + void ev_tally_thr(Bond * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, ThrData * const); + + // Angle + void ev_tally_thr(Angle * const, const int, const int, const int, const int, const int, + const double, const double * const, const double * const, + const double, const double, const double, const double, const double, + const double, ThrData * const thr); + void ev_tally13_thr(Angle * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, ThrData * const thr); + + // Dihedral + void ev_tally_thr(Dihedral * const, const int, const int, const int, const int, const int, + const int, const double, const double * const, const double * const, + const double * const, const double, const double, const double, + const double, const double, const double, const double, const double, + const double, ThrData * const); + + // Improper + void ev_tally_thr(Improper * const, const int, const int, const int, const int, const int, + const int, const double, const double * const, const double * const, + const double * const, const double, const double, const double, + const double, const double, const double, const double, const double, + const double, ThrData * const); - protected: - // set loop range, thread id, and force array offset for threaded runs. - double **loop_setup_thr(double **, int &, int &, int &, int, int, int); - - // reduce per thread data into the first part of the array - void data_reduce_thr(double *, int, int, int, int); + // style independent versions + void v_tally2_thr(const int, const int, const double, const double * const, ThrData * const); + void v_tally3_thr(const int, const int, const int, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void v_tally4_thr(const int, const int, const int, const int, const double * const, + const double * const, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void ev_tally_list_thr(Pair * const, const int, const int * const, + const double , const double * const , ThrData * const); }; +// set loop range thread id, and force array offset for threaded runs. +static inline void loop_setup_thr(int &ifrom, int &ito, int &tid, + int inum, int nthreads, int nproxy=0) +{ +#if defined(_OPENMP) + tid = omp_get_thread_num(); + + // each thread works on a fixed chunk of atoms. + const int idelta = 1 + inum/(nthreads-nproxy); + ifrom = (tid-nproxy)*idelta; + ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + tid = 0; + ifrom = 0; + ito = inum; +#endif +} + } #endif -- GitLab