From ce59e32d1cf621ff59da11f7eaaa34aef10f36ae Mon Sep 17 00:00:00 2001 From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa> Date: Fri, 28 Aug 2015 20:00:56 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13937 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/GPU/fix_gpu.cpp | 2 +- src/KOKKOS/verlet_kokkos.cpp | 51 +- src/KSPACE/fix_tune_kspace.cpp | 4 +- src/REPLICA/neb.cpp | 8 +- src/REPLICA/prd.cpp | 84 ++-- src/REPLICA/tad.cpp | 92 ++-- src/REPLICA/temper.cpp | 4 +- src/REPLICA/verlet_split.cpp | 26 +- src/USER-CUDA/verlet_cuda.cpp | 36 +- src/USER-FEP/compute_fep.cpp | 8 +- src/USER-INTEL/verlet_intel.cpp | 41 +- src/USER-INTEL/verlet_split_intel.cpp | 30 +- src/USER-OMP/angle_charmm_omp.cpp | 2 + src/USER-OMP/angle_class2_omp.cpp | 2 + src/USER-OMP/angle_cosine_delta_omp.cpp | 2 + src/USER-OMP/angle_cosine_omp.cpp | 2 + src/USER-OMP/angle_cosine_periodic_omp.cpp | 2 + src/USER-OMP/angle_cosine_shift_exp_omp.cpp | 2 + src/USER-OMP/angle_cosine_shift_omp.cpp | 2 + src/USER-OMP/angle_cosine_squared_omp.cpp | 2 + src/USER-OMP/angle_dipole_omp.cpp | 2 + src/USER-OMP/angle_fourier_omp.cpp | 2 + src/USER-OMP/angle_fourier_simple_omp.cpp | 2 + src/USER-OMP/angle_harmonic_omp.cpp | 2 + src/USER-OMP/angle_quartic_omp.cpp | 2 + src/USER-OMP/angle_sdk_omp.cpp | 2 + src/USER-OMP/angle_table_omp.cpp | 2 + src/USER-OMP/bond_class2_omp.cpp | 2 + src/USER-OMP/bond_fene_expand_omp.cpp | 2 + src/USER-OMP/bond_fene_omp.cpp | 2 + src/USER-OMP/bond_harmonic_omp.cpp | 2 + src/USER-OMP/bond_harmonic_shift_cut_omp.cpp | 2 + src/USER-OMP/bond_harmonic_shift_omp.cpp | 2 + src/USER-OMP/bond_morse_omp.cpp | 2 + src/USER-OMP/bond_nonlinear_omp.cpp | 2 + src/USER-OMP/bond_quartic_omp.cpp | 2 + src/USER-OMP/bond_table_omp.cpp | 2 + src/USER-OMP/dihedral_charmm_omp.cpp | 2 + src/USER-OMP/dihedral_class2_omp.cpp | 2 + .../dihedral_cosine_shift_exp_omp.cpp | 2 + src/USER-OMP/dihedral_fourier_omp.cpp | 2 + src/USER-OMP/dihedral_harmonic_omp.cpp | 2 + src/USER-OMP/dihedral_helix_omp.cpp | 2 + src/USER-OMP/dihedral_multi_harmonic_omp.cpp | 2 + src/USER-OMP/dihedral_nharmonic_omp.cpp | 2 + src/USER-OMP/dihedral_opls_omp.cpp | 2 + src/USER-OMP/dihedral_quadratic_omp.cpp | 2 + src/USER-OMP/dihedral_table_omp.cpp | 2 + src/USER-OMP/ewald_omp.cpp | 2 + src/USER-OMP/fix_omp.cpp | 31 +- src/USER-OMP/fix_omp.h | 2 + src/USER-OMP/improper_class2_omp.cpp | 2 + src/USER-OMP/improper_cossq_omp.cpp | 2 + src/USER-OMP/improper_cvff_omp.cpp | 2 + src/USER-OMP/improper_fourier_omp.cpp | 14 +- src/USER-OMP/improper_harmonic_omp.cpp | 2 + src/USER-OMP/improper_ring_omp.cpp | 2 + src/USER-OMP/improper_umbrella_omp.cpp | 2 + src/USER-OMP/msm_cg_omp.cpp | 1 + src/USER-OMP/msm_omp.cpp | 8 +- src/USER-OMP/neighbor_omp.h | 22 +- src/USER-OMP/pair_adp_omp.cpp | 4 + src/USER-OMP/pair_airebo_omp.cpp | 2 + src/USER-OMP/pair_beck_omp.cpp | 2 + src/USER-OMP/pair_born_coul_long_omp.cpp | 2 + src/USER-OMP/pair_born_coul_msm_omp.cpp | 2 + src/USER-OMP/pair_born_coul_wolf_omp.cpp | 2 + src/USER-OMP/pair_born_omp.cpp | 2 + src/USER-OMP/pair_brownian_omp.cpp | 2 + src/USER-OMP/pair_brownian_poly_omp.cpp | 2 + src/USER-OMP/pair_buck_coul_cut_omp.cpp | 2 + src/USER-OMP/pair_buck_coul_long_omp.cpp | 2 + src/USER-OMP/pair_buck_coul_msm_omp.cpp | 2 + src/USER-OMP/pair_buck_long_coul_long_omp.cpp | 12 +- src/USER-OMP/pair_buck_omp.cpp | 2 + src/USER-OMP/pair_cdeam_omp.cpp | 24 +- src/USER-OMP/pair_colloid_omp.cpp | 2 + src/USER-OMP/pair_comb_omp.cpp | 2 + src/USER-OMP/pair_coul_cut_omp.cpp | 2 + src/USER-OMP/pair_coul_cut_soft_omp.cpp | 2 + src/USER-OMP/pair_coul_debye_omp.cpp | 2 + src/USER-OMP/pair_coul_diel_omp.cpp | 2 + src/USER-OMP/pair_coul_dsf_omp.cpp | 2 + src/USER-OMP/pair_coul_long_omp.cpp | 2 + src/USER-OMP/pair_coul_long_soft_omp.cpp | 2 + src/USER-OMP/pair_coul_msm_omp.cpp | 2 + src/USER-OMP/pair_coul_wolf_omp.cpp | 2 + src/USER-OMP/pair_dpd_omp.cpp | 2 + src/USER-OMP/pair_dpd_tstat_omp.cpp | 2 + src/USER-OMP/pair_eam_omp.cpp | 4 + src/USER-OMP/pair_edip_omp.cpp | 2 + src/USER-OMP/pair_eim_omp.cpp | 6 + src/USER-OMP/pair_gauss_cut_omp.cpp | 2 + src/USER-OMP/pair_gauss_omp.cpp | 2 + src/USER-OMP/pair_gayberne_omp.cpp | 2 + src/USER-OMP/pair_gran_hertz_history_omp.cpp | 2 + src/USER-OMP/pair_gran_hooke_history_omp.cpp | 4 +- src/USER-OMP/pair_gran_hooke_omp.cpp | 4 +- src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp | 2 + .../pair_hbond_dreiding_morse_omp.cpp | 2 + src/USER-OMP/pair_line_lj_omp.cpp | 2 + src/USER-OMP/pair_lj96_cut_omp.cpp | 2 + ...air_lj_charmm_coul_charmm_implicit_omp.cpp | 2 + .../pair_lj_charmm_coul_charmm_omp.cpp | 2 + src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp | 2 + .../pair_lj_charmm_coul_long_soft_omp.cpp | 3 +- src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp | 8 +- src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp | 2 + src/USER-OMP/pair_lj_class2_coul_long_omp.cpp | 2 + src/USER-OMP/pair_lj_class2_omp.cpp | 2 + src/USER-OMP/pair_lj_cubic_omp.cpp | 2 + src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp | 2 + .../pair_lj_cut_coul_cut_soft_omp.cpp | 2 + src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp | 2 + src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_coul_long_omp.cpp | 2 + .../pair_lj_cut_coul_long_soft_omp.cpp | 2 + src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp | 2 + src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp | 2 + src/USER-OMP/pair_lj_cut_omp.cpp | 3 +- src/USER-OMP/pair_lj_cut_soft_omp.cpp | 2 + src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp | 4 +- src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp | 4 +- .../pair_lj_cut_tip4p_long_soft_omp.cpp | 2 + src/USER-OMP/pair_lj_expand_omp.cpp | 2 + .../pair_lj_gromacs_coul_gromacs_omp.cpp | 2 + src/USER-OMP/pair_lj_gromacs_omp.cpp | 2 + src/USER-OMP/pair_lj_long_coul_long_omp.cpp | 42 +- src/USER-OMP/pair_lj_long_coul_long_omp.h | 2 +- src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp | 12 +- src/USER-OMP/pair_lj_long_tip4p_long_omp.h | 2 +- src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp | 2 + src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp | 2 + src/USER-OMP/pair_lj_sdk_omp.cpp | 2 + src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp | 2 + src/USER-OMP/pair_lj_sf_omp.cpp | 2 + src/USER-OMP/pair_lj_smooth_linear_omp.cpp | 2 + src/USER-OMP/pair_lj_smooth_omp.cpp | 2 + src/USER-OMP/pair_lubricate_omp.cpp | 2 + src/USER-OMP/pair_lubricate_poly_omp.cpp | 2 + src/USER-OMP/pair_meam_spline_omp.cpp | 3 + src/USER-OMP/pair_morse_omp.cpp | 2 + src/USER-OMP/pair_nb3b_harmonic_omp.cpp | 2 + src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp | 2 + src/USER-OMP/pair_nm_cut_coul_long_omp.cpp | 2 + src/USER-OMP/pair_nm_cut_omp.cpp | 2 + src/USER-OMP/pair_peri_lps_omp.cpp | 2 + src/USER-OMP/pair_peri_pmb_omp.cpp | 2 + src/USER-OMP/pair_resquared_omp.cpp | 2 + src/USER-OMP/pair_soft_omp.cpp | 2 + src/USER-OMP/pair_sw_omp.cpp | 2 + src/USER-OMP/pair_table_omp.cpp | 2 + src/USER-OMP/pair_tersoff_mod_omp.cpp | 2 + src/USER-OMP/pair_tersoff_omp.cpp | 2 + src/USER-OMP/pair_tersoff_table_omp.cpp | 2 + src/USER-OMP/pair_tip4p_cut_omp.cpp | 4 +- src/USER-OMP/pair_tip4p_long_omp.cpp | 4 +- src/USER-OMP/pair_tip4p_long_soft_omp.cpp | 2 + src/USER-OMP/pair_tri_lj_omp.cpp | 2 + src/USER-OMP/pair_yukawa_colloid_omp.cpp | 2 + src/USER-OMP/pair_yukawa_omp.cpp | 2 + src/USER-OMP/pair_zbl_omp.cpp | 2 + src/USER-OMP/pppm_cg_omp.cpp | 19 +- src/USER-OMP/pppm_disp_omp.cpp | 91 ++-- src/USER-OMP/pppm_disp_omp.h | 2 +- src/USER-OMP/pppm_omp.cpp | 19 +- src/USER-OMP/pppm_tip4p_omp.cpp | 17 +- src/USER-OMP/respa_omp.cpp | 135 ++++-- src/USER-OMP/thr_data.cpp | 56 ++- src/USER-OMP/thr_data.h | 15 +- src/USER-OMP/thr_omp.cpp | 4 +- src/finish.cpp | 445 ++++++++++++------ src/input.cpp | 10 + src/input.h | 5 +- src/min.cpp | 32 +- src/min_cg.cpp | 2 +- src/min_fire.cpp | 2 +- src/min_hftn.cpp | 2 +- src/min_quickmin.cpp | 2 +- src/min_sd.cpp | 2 +- src/minimize.cpp | 4 +- src/neighbor.cpp | 6 +- src/rerun.cpp | 4 +- src/respa.cpp | 62 ++- src/run.cpp | 8 +- src/thermo.cpp | 14 +- src/timer.cpp | 178 ++++++- src/timer.h | 54 ++- src/verlet.cpp | 37 +- 189 files changed, 1515 insertions(+), 576 deletions(-) diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 6135a1f754..1dea4dc467 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -295,7 +295,7 @@ void FixGPU::post_force(int vflag) force->pair->virial[5] += lvirial[5]; if (force->pair->vflag_fdotr) force->pair->virial_fdotr_compute(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 1e7475dc49..b63ca98ba4 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -107,7 +107,7 @@ void VerletKokkos::setup() atomKK->sync(force->pair->execution_space,force->pair->datamask_read); atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } else if (force->pair) force->pair->compute_dummy(eflag,vflag); @@ -133,7 +133,7 @@ void VerletKokkos::setup() atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); force->improper->compute(eflag,vflag); } - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if(force->kspace) { @@ -142,7 +142,7 @@ void VerletKokkos::setup() atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } else force->kspace->compute_dummy(eflag,vflag); } @@ -209,7 +209,7 @@ void VerletKokkos::setup_minimal(int flag) atomKK->sync(force->pair->execution_space,force->pair->datamask_read); atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } else if (force->pair) force->pair->compute_dummy(eflag,vflag); @@ -235,7 +235,7 @@ void VerletKokkos::setup_minimal(int flag) atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); force->improper->compute(eflag,vflag); } - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if(force->kspace) { @@ -244,7 +244,7 @@ void VerletKokkos::setup_minimal(int flag) atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } else force->kspace->compute_dummy(eflag,vflag); } @@ -286,9 +286,11 @@ void VerletKokkos::run(int n) // initial time integration ktimer.reset(); + timer->stamp(); modify->initial_integrate(vflag); time += ktimer.seconds(); if (n_post_integrate) modify->post_integrate(); + timer->stamp(Timer::MODIFY); // regular communication vs neighbor list rebuild @@ -297,13 +299,17 @@ void VerletKokkos::run(int n) if (nflag == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } else { // added debug //atomKK->sync(Host,ALL_MASK); //atomKK->modified(Host,ALL_MASK); - if (n_pre_exchange) modify->pre_exchange(); + if (n_pre_exchange) { + timer->stamp(); + modify->pre_exchange(); + timer->stamp(Timer::MODIFY); + } // debug //atomKK->sync(Host,ALL_MASK); //atomKK->modified(Host,ALL_MASK); @@ -330,10 +336,13 @@ void VerletKokkos::run(int n) if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost); - timer->stamp(TIME_COMM); - if (n_pre_neighbor) modify->pre_neighbor(); + timer->stamp(Timer::COMM); + if (n_pre_neighbor) { + modify->pre_neighbor(); + timer->stamp(Timer::MODIFY); + } neighbor->build(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); } // force computations @@ -342,19 +351,24 @@ void VerletKokkos::run(int n) // and Pair:ev_tally() needs to be called before any tallying force_clear(); + + timer->stamp(); + // added for debug //atomKK->k_x.sync<LMPHostType>(); //atomKK->k_f.sync<LMPHostType>(); //atomKK->k_f.modify<LMPHostType>(); - if (n_pre_force) modify->pre_force(vflag); + if (n_pre_force) { + modify->pre_force(vflag); + timer->stamp(Timer::MODIFY); + } - timer->stamp(); if (pair_compute_flag) { atomKK->sync(force->pair->execution_space,force->pair->datamask_read); atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (atomKK->molecular) { @@ -378,20 +392,20 @@ void VerletKokkos::run(int n) atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); force->improper->compute(eflag,vflag); } - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (kspace_compute_flag) { atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } // reverse communication of forces if (force->newton) comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); // force modifications, final time integration, diagnostics @@ -400,6 +414,7 @@ void VerletKokkos::run(int n) if (n_post_force) modify->post_force(vflag); modify->final_integrate(); if (n_end_of_step) modify->end_of_step(); + timer->stamp(Timer::MODIFY); time += ktimer.seconds(); @@ -410,7 +425,7 @@ void VerletKokkos::run(int n) timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } } diff --git a/src/KSPACE/fix_tune_kspace.cpp b/src/KSPACE/fix_tune_kspace.cpp index e3d5a5b5cd..bed94d0f04 100644 --- a/src/KSPACE/fix_tune_kspace.cpp +++ b/src/KSPACE/fix_tune_kspace.cpp @@ -159,7 +159,7 @@ void FixTuneKspace::pre_exchange() adjust_rcut(time); } - last_spcpu = timer->elapsed(TIME_LOOP); + last_spcpu = timer->elapsed(Timer::TOTAL); } /* ---------------------------------------------------------------------- @@ -177,7 +177,7 @@ double FixTuneKspace::get_timing_info() dvalue = 0.0; firststep = 1; } else { - new_cpu = timer->elapsed(TIME_LOOP); + new_cpu = timer->elapsed(Timer::TOTAL); double cpu_diff = new_cpu - last_spcpu; int step_diff = new_step - last_step; if (step_diff > 0.0) dvalue = cpu_diff/step_diff; diff --git a/src/REPLICA/neb.cpp b/src/REPLICA/neb.cpp index 4c34013bfa..8cb0be7462 100644 --- a/src/REPLICA/neb.cpp +++ b/src/REPLICA/neb.cpp @@ -232,7 +232,7 @@ void NEB::run() // damped dynamic min styles insure all replicas converge together timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); while (update->minimize->niter < n1steps) { update->minimize->run(nevery); @@ -240,7 +240,7 @@ void NEB::run() if (update->minimize->stop_condition) break; } - timer->barrier_stop(TIME_LOOP); + timer->barrier_stop(); update->minimize->cleanup(); @@ -302,7 +302,7 @@ void NEB::run() // damped dynamic min styles insure all replicas converge together timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); while (update->minimize->niter < n2steps) { update->minimize->run(nevery); @@ -310,7 +310,7 @@ void NEB::run() if (update->minimize->stop_condition) break; } - timer->barrier_stop(TIME_LOOP); + timer->barrier_stop(); update->minimize->cleanup(); diff --git a/src/REPLICA/prd.cpp b/src/REPLICA/prd.cpp index a99c201480..de3447db98 100644 --- a/src/REPLICA/prd.cpp +++ b/src/REPLICA/prd.cpp @@ -278,8 +278,8 @@ void PRD::command(int narg, char **arg) share_event(0,0,0); timer->init(); - timer->barrier_start(TIME_LOOP); - time_start = timer->array[TIME_LOOP]; + timer->barrier_start(); + time_start = timer->get_wall(Timer::TOTAL); log_event(); @@ -305,8 +305,8 @@ void PRD::command(int narg, char **arg) time_dephase = time_dynamics = time_quench = time_comm = time_output = 0.0; bigint clock = 0; - timer->barrier_start(TIME_LOOP); - time_start = timer->array[TIME_LOOP]; + timer->barrier_start(); + time_start = timer->get_wall(Timer::TOTAL); int istep = 0; @@ -386,7 +386,7 @@ void PRD::command(int narg, char **arg) lmp->init(); update->integrate->setup(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); if (t_corr > 0) replicate(ireplica); if (temp_flag == 0) { @@ -396,16 +396,16 @@ void PRD::command(int narg, char **arg) universe->uworld); } - timer->barrier_stop(TIME_LOOP); - time_comm += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_comm += timer->get_wall(Timer::TOTAL); // write restart file of hot coords if (restart_flag) { - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); output->write_restart(update->ntimestep); - timer->barrier_stop(TIME_LOOP); - time_output += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_output += timer->get_wall(Timer::TOTAL); } if (stepmode == 0) istep = update->ntimestep - update->beginstep; @@ -416,14 +416,14 @@ void PRD::command(int narg, char **arg) // set total timers and counters so Finish() will process them - timer->array[TIME_LOOP] = time_start; - timer->barrier_stop(TIME_LOOP); + timer->set_wall(Timer::TOTAL, time_start); + timer->barrier_stop(); - timer->array[TIME_PAIR] = time_dephase; - timer->array[TIME_BOND] = time_dynamics; - timer->array[TIME_KSPACE] = time_quench; - timer->array[TIME_COMM] = time_comm; - timer->array[TIME_OUTPUT] = time_output; + timer->set_wall(Timer::DEPHASE, time_dephase); + timer->set_wall(Timer::DYNAMICS, time_dynamics); + timer->set_wall(Timer::QUENCH, time_quench); + timer->set_wall(Timer::REPCOMM, time_comm); + timer->set_wall(Timer::REPOUT, time_output); neighbor->ncalls = nbuild; neighbor->ndanger = ndanger; @@ -431,14 +431,14 @@ void PRD::command(int narg, char **arg) if (me_universe == 0) { if (universe->uscreen) fprintf(universe->uscreen, - "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT + "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT " atoms\n", - timer->array[TIME_LOOP],nprocs_universe,nsteps,atom->natoms); + timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms); if (universe->ulogfile) fprintf(universe->ulogfile, - "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT + "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT " atoms\n", - timer->array[TIME_LOOP],nprocs_universe,nsteps,atom->natoms); + timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms); } if (me == 0) { @@ -541,10 +541,10 @@ void PRD::dynamics(int nsteps, double &time_category) //modify->addstep_compute_all(update->ntimestep); bigint ncalls = neighbor->ncalls; - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); update->integrate->run(nsteps); - timer->barrier_stop(TIME_LOOP); - time_category += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_category += timer->get_wall(Timer::TOTAL); nbuild += neighbor->ncalls - ncalls; ndanger += neighbor->ndanger; @@ -583,10 +583,10 @@ void PRD::quench() int ncalls = neighbor->ncalls; - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); update->minimize->run(maxiter); - timer->barrier_stop(TIME_LOOP); - time_quench += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_quench += timer->get_wall(Timer::TOTAL); if (neighbor->ncalls == ncalls) quench_reneighbor = 0; else quench_reneighbor = 1; @@ -619,7 +619,7 @@ int PRD::check_event(int replica_num) if (compute_event->compute_scalar() > 0.0) worldflag = 1; if (replica_num >= 0 && replica_num != universe->iworld) worldflag = 0; - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); if (me == 0) MPI_Allreduce(&worldflag,&universeflag,1, MPI_INT,MPI_SUM,comm_replica); @@ -655,8 +655,8 @@ int PRD::check_event(int replica_num) MPI_Bcast(&ireplica,1,MPI_INT,0,world); } - timer->barrier_stop(TIME_LOOP); - time_comm += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_comm += timer->get_wall(Timer::TOTAL); return ireplica; } @@ -672,14 +672,14 @@ int PRD::check_event(int replica_num) void PRD::share_event(int ireplica, int flag, int decrement) { - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); // communicate quenched coords to all replicas and store as event // decrement event counter if flag = 0 since not really an event replicate(ireplica); - timer->barrier_stop(TIME_LOOP); - time_comm += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_comm += timer->get_wall(Timer::TOTAL); // adjust time for last correlated event check (not on first event) @@ -717,21 +717,21 @@ void PRD::share_event(int ireplica, int flag, int decrement) // addstep_compute_all insures eng/virial are calculated if needed if (output->ndump && universe->iworld == 0) { - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); modify->addstep_compute_all(update->ntimestep); update->integrate->setup_minimal(1); output->write_dump(update->ntimestep); - timer->barrier_stop(TIME_LOOP); - time_output += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_output += timer->get_wall(Timer::TOTAL); } // restore and communicate hot coords to all replicas fix_event->restore_state_quench(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); replicate(ireplica); - timer->barrier_stop(TIME_LOOP); - time_comm += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_comm += timer->get_wall(Timer::TOTAL); } /* ---------------------------------------------------------------------- @@ -740,13 +740,13 @@ void PRD::share_event(int ireplica, int flag, int decrement) void PRD::log_event() { - timer->array[TIME_LOOP] = time_start; + timer->set_wall(Timer::TOTAL, time_start); if (universe->me == 0) { if (universe->uscreen) fprintf(universe->uscreen, BIGINT_FORMAT " %.3f " BIGINT_FORMAT " %d %d %d %d\n", fix_event->event_timestep, - timer->elapsed(TIME_LOOP), + timer->elapsed(Timer::TOTAL), fix_event->clock, fix_event->event_number,fix_event->correlated_event, fix_event->ncoincident, @@ -755,7 +755,7 @@ void PRD::log_event() fprintf(universe->ulogfile, BIGINT_FORMAT " %.3f " BIGINT_FORMAT " %d %d %d %d\n", fix_event->event_timestep, - timer->elapsed(TIME_LOOP), + timer->elapsed(Timer::TOTAL), fix_event->clock, fix_event->event_number,fix_event->correlated_event, fix_event->ncoincident, diff --git a/src/REPLICA/tad.cpp b/src/REPLICA/tad.cpp index 7c858c754f..80381a7a3b 100644 --- a/src/REPLICA/tad.cpp +++ b/src/REPLICA/tad.cpp @@ -255,8 +255,8 @@ void TAD::command(int narg, char **arg) quench(); timer->init(); - timer->barrier_start(TIME_LOOP); - time_start = timer->array[TIME_LOOP]; + timer->barrier_start(); + time_start = timer->get_wall(Timer::TOTAL); fix_event->store_event_tad(update->ntimestep); log_event(0); fix_event->restore_state_quench(); @@ -275,8 +275,8 @@ void TAD::command(int narg, char **arg) nbuild = ndanger = 0; time_neb = time_dynamics = time_quench = time_comm = time_output = 0.0; - timer->barrier_start(TIME_LOOP); - time_start = timer->array[TIME_LOOP]; + timer->barrier_start(); + time_start = timer->get_wall(Timer::TOTAL); int confident_flag, event_flag; @@ -347,10 +347,10 @@ void TAD::command(int narg, char **arg) // write restart file of hot coords if (restart_flag) { - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); output->write_restart(update->ntimestep); - timer->barrier_stop(TIME_LOOP); - time_output += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_output += timer->get_wall(Timer::TOTAL); } } @@ -381,14 +381,14 @@ void TAD::command(int narg, char **arg) // set total timers and counters so Finish() will process them - timer->array[TIME_LOOP] = time_start; - timer->barrier_stop(TIME_LOOP); + timer->set_wall(Timer::TOTAL, time_start); + timer->barrier_stop(); - timer->array[TIME_PAIR] = time_neb; - timer->array[TIME_BOND] = time_dynamics; - timer->array[TIME_KSPACE] = time_quench; - timer->array[TIME_COMM] = time_comm; - timer->array[TIME_OUTPUT] = time_output; + timer->set_wall(Timer::NEB, time_neb); + timer->set_wall(Timer::DYNAMICS, time_dynamics); + timer->set_wall(Timer::QUENCH, time_quench); + timer->set_wall(Timer::REPCOMM, time_comm); + timer->set_wall(Timer::REPOUT, time_output); neighbor->ncalls = nbuild; neighbor->ndanger = ndanger; @@ -396,14 +396,14 @@ void TAD::command(int narg, char **arg) if (me_universe == 0) { if (universe->uscreen) fprintf(universe->uscreen, - "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT + "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT " atoms\n", - timer->array[TIME_LOOP],nprocs_universe,nsteps,atom->natoms); - if (universe->ulogfile) + timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms); + if (universe->ulogfile) fprintf(universe->ulogfile, - "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT + "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT " atoms\n", - timer->array[TIME_LOOP],nprocs_universe,nsteps,atom->natoms); + timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms); } if (me_universe == 0) fclose(ulogfile_neb); @@ -451,10 +451,10 @@ void TAD::dynamics() //modify->addstep_compute_all(update->ntimestep); int ncalls = neighbor->ncalls; - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); update->integrate->run(t_event); - timer->barrier_stop(TIME_LOOP); - time_dynamics += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_dynamics += timer->get_wall(Timer::TOTAL); nbuild += neighbor->ncalls - ncalls; ndanger += neighbor->ndanger; @@ -493,10 +493,10 @@ void TAD::quench() int ncalls = neighbor->ncalls; - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); update->minimize->run(maxiter); - timer->barrier_stop(TIME_LOOP); - time_quench += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_quench += timer->get_wall(Timer::TOTAL); if (neighbor->ncalls == ncalls) quench_reneighbor = 0; else quench_reneighbor = 1; @@ -535,14 +535,14 @@ int TAD::check_event() void TAD::log_event(int ievent) { - timer->array[TIME_LOOP] = time_start; + timer->set_wall(Timer::TOTAL, time_start); if (universe->me == 0) { double tfrac = 0.0; if (universe->uscreen) fprintf(universe->uscreen, BIGINT_FORMAT " %.3f %d %d %s %.3f %.3f %.3f %.3f\n", fix_event->event_timestep, - timer->elapsed(TIME_LOOP), + timer->elapsed(Timer::TOTAL), fix_event->event_number,ievent, "E ", fix_event->ebarrier,tfrac, @@ -551,7 +551,7 @@ void TAD::log_event(int ievent) fprintf(universe->ulogfile, BIGINT_FORMAT " %.3f %d %d %s %.3f %.3f %.3f %.3f\n", fix_event->event_timestep, - timer->elapsed(TIME_LOOP), + timer->elapsed(Timer::TOTAL), fix_event->event_number,ievent, "E ", fix_event->ebarrier,tfrac, @@ -563,12 +563,12 @@ void TAD::log_event(int ievent) // addstep_compute_all insures eng/virial are calculated if needed if (output->ndump && universe->iworld == 0) { - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); modify->addstep_compute_all(update->ntimestep); update->integrate->setup_minimal(1); output->write_dump(update->ntimestep); - timer->barrier_stop(TIME_LOOP); - time_output += timer->array[TIME_LOOP]; + timer->barrier_stop(); + time_output += timer->get_wall(Timer::TOTAL); } } @@ -604,10 +604,10 @@ void TAD::options(int narg, char **arg) while (iarg < narg) { if (strcmp(arg[iarg],"min") == 0) { if (iarg+5 > narg) error->all(FLERR,"Illegal tad command"); - etol = atof(arg[iarg+1]); - ftol = atof(arg[iarg+2]); - maxiter = atoi(arg[iarg+3]); - maxeval = atoi(arg[iarg+4]); + etol = force->numeric(FLERR,arg[iarg+1]); + ftol = force->numeric(FLERR,arg[iarg+2]); + maxiter = force->inumeric(FLERR,arg[iarg+3]); + maxeval = force->inumeric(FLERR,arg[iarg+4]); if (maxiter < 0 || maxeval < 0 || etol < 0.0 || ftol < 0.0 ) error->all(FLERR,"Illegal tad command"); @@ -615,11 +615,11 @@ void TAD::options(int narg, char **arg) } else if (strcmp(arg[iarg],"neb") == 0) { if (iarg+6 > narg) error->all(FLERR,"Illegal tad command"); - etol_neb = atof(arg[iarg+1]); - ftol_neb = atof(arg[iarg+2]); - n1steps_neb = atoi(arg[iarg+3]); - n2steps_neb = atoi(arg[iarg+4]); - nevery_neb = atoi(arg[iarg+5]); + etol_neb = force->numeric(FLERR,arg[iarg+1]); + ftol_neb = force->numeric(FLERR,arg[iarg+2]); + n1steps_neb = force->inumeric(FLERR,arg[iarg+3]); + n2steps_neb = force->inumeric(FLERR,arg[iarg+4]); + nevery_neb = force->inumeric(FLERR,arg[iarg+5]); if (etol_neb < 0.0 || ftol_neb < 0.0 || n1steps_neb < 0 || n2steps_neb < 0 || nevery_neb < 0) error->all(FLERR,"Illegal tad command"); @@ -753,10 +753,10 @@ void TAD::perform_neb(int ievent) // had to bypass timer interface // because timer->array is reset inside neb->run() - // timer->barrier_start(TIME_LOOP); + // timer->barrier_start(); // neb->run(); - // timer->barrier_stop(TIME_LOOP); - // time_neb += timer->array[TIME_LOOP]; + // timer->barrier_stop(); + // time_neb += timer->get_wall(Timer::TOTAL); MPI_Barrier(world); double time_tmp = MPI_Wtime(); @@ -977,7 +977,7 @@ void TAD::compute_tlo(int ievent) // first-replica output about each event - timer->array[TIME_LOOP] = time_start; + timer->set_wall(Timer::TOTAL, time_start); if (universe->me == 0) { double tfrac = 0.0; if (ievent > 0) tfrac = delthi/deltstop; @@ -986,7 +986,7 @@ void TAD::compute_tlo(int ievent) fprintf(universe->uscreen, BIGINT_FORMAT " %.3f %d %d %s %.3f %.3f %.3f %.3f\n", fix_event_list[ievent]->event_timestep, - timer->elapsed(TIME_LOOP), + timer->elapsed(Timer::TOTAL), fix_event->event_number, ievent,statstr,ebarrier,tfrac, fix_event->tlo,deltlo); @@ -995,7 +995,7 @@ void TAD::compute_tlo(int ievent) fprintf(universe->ulogfile, BIGINT_FORMAT " %.3f %d %d %s %.3f %.3f %.3f %.3f\n", fix_event_list[ievent]->event_timestep, - timer->elapsed(TIME_LOOP), + timer->elapsed(Timer::TOTAL), fix_event->event_number, ievent,statstr,ebarrier,tfrac, fix_event->tlo,deltlo); diff --git a/src/REPLICA/temper.cpp b/src/REPLICA/temper.cpp index 8d7204345f..45e100ad52 100644 --- a/src/REPLICA/temper.cpp +++ b/src/REPLICA/temper.cpp @@ -204,7 +204,7 @@ void Temper::command(int narg, char **arg) } timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); for (int iswap = 0; iswap < nswaps; iswap++) { @@ -309,7 +309,7 @@ void Temper::command(int narg, char **arg) if (me_universe == 0) print_status(); } - timer->barrier_stop(TIME_LOOP); + timer->barrier_stop(); update->integrate->cleanup(); diff --git a/src/REPLICA/verlet_split.cpp b/src/REPLICA/verlet_split.cpp index c9da4270fb..408821fe22 100644 --- a/src/REPLICA/verlet_split.cpp +++ b/src/REPLICA/verlet_split.cpp @@ -279,7 +279,7 @@ void VerletSplit::run(int n) MPI_Barrier(universe->uworld); timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); // setup initial Rspace <-> Kspace comm params @@ -325,7 +325,7 @@ void VerletSplit::run(int n) if (nflag == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } else { if (n_pre_exchange) modify->pre_exchange(); if (triclinic) domain->x2lamda(atom->nlocal); @@ -340,10 +340,10 @@ void VerletSplit::run(int n) if (sortflag && ntimestep >= atom->nextsort) atom->sort(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); if (n_pre_neighbor) modify->pre_neighbor(); neighbor->build(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); } } @@ -363,7 +363,7 @@ void VerletSplit::run(int n) timer->stamp(); if (force->pair) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (atom->molecular) { @@ -371,12 +371,12 @@ void VerletSplit::run(int n) if (force->angle) force->angle->compute(eflag,vflag); if (force->dihedral) force->dihedral->compute(eflag,vflag); if (force->improper) force->improper->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (force->newton) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } } else { @@ -388,14 +388,14 @@ void VerletSplit::run(int n) if (force->kspace) { timer->stamp(); force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } // TIP4P PPPM puts forces on ghost atoms, so must reverse_comm() if (tip4p_flag && force->newton) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } } @@ -407,14 +407,16 @@ void VerletSplit::run(int n) // all output if (master) { + timer->stamp(); if (n_post_force) modify->post_force(vflag); modify->final_integrate(); if (n_end_of_step) modify->end_of_step(); + timer->stamp(Timer::MODIFY); if (ntimestep == output->next) { timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } } @@ -486,7 +488,7 @@ void VerletSplit::rk_setup() atom->map_clear(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } } } @@ -535,7 +537,7 @@ void VerletSplit::r2k_comm() if (tip4p_flag && !master) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } } diff --git a/src/USER-CUDA/verlet_cuda.cpp b/src/USER-CUDA/verlet_cuda.cpp index 7d4a0c4114..eba4a0b09c 100644 --- a/src/USER-CUDA/verlet_cuda.cpp +++ b/src/USER-CUDA/verlet_cuda.cpp @@ -679,7 +679,7 @@ void VerletCuda::run(int n) my_gettime(CLOCK_REALTIME, &starttime); timer->stamp(); comm->forward_comm(1); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); my_gettime(CLOCK_REALTIME, &endtime); cuda->shared_data.cuda_timings.comm_forward_total += endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; @@ -699,7 +699,7 @@ void VerletCuda::run(int n) //start force calculation asynchronus cuda->shared_data.comm.comm_phase = 1; force->pair->compute(eflag, vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); //CudaWrapper_Sync(); //download comm buffers from GPU, perform MPI communication and upload buffers again @@ -708,11 +708,11 @@ void VerletCuda::run(int n) my_gettime(CLOCK_REALTIME, &endtime); cuda->shared_data.cuda_timings.comm_forward_total += endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); //wait for force calculation CudaWrapper_Sync(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); //unpack communication buffers my_gettime(CLOCK_REALTIME, &starttime); @@ -721,7 +721,7 @@ void VerletCuda::run(int n) cuda->shared_data.cuda_timings.comm_forward_total += endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); MYDBG(printf("# CUDA VerletCuda::iterate: communicate done\n");) cuda->shared_data.cuda_timings.test1 += endtotal.tv_sec - starttotal.tv_sec + 1.0 * (endtotal.tv_nsec - starttotal.tv_nsec) / 1000000000; @@ -732,7 +732,7 @@ void VerletCuda::run(int n) my_gettime(CLOCK_REALTIME, &endtime); cuda->shared_data.cuda_timings.comm_forward_total += endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); MYDBG(printf("# CUDA VerletCuda::iterate: communicate done\n");) } } else { @@ -822,7 +822,7 @@ void VerletCuda::run(int n) cuda->shared_data.buffer_new = 2; MYDBG(printf("# CUDA VerletCuda::iterate: neighbor build\n");) - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); my_gettime(CLOCK_REALTIME, &endtime); cuda->shared_data.cuda_timings.test2 += endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000; @@ -830,7 +830,7 @@ void VerletCuda::run(int n) //rebuild neighbor list test_atom(testatom, "Pre Neighbor"); neighbor->build(0); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); MYDBG(printf("# CUDA VerletCuda::iterate: neighbor done\n");) //if bonded interactions are used (in this case collect_forces_later is true), transfer data which only changes upon exchange/border routines from GPU to CPU if(cuda->shared_data.pair.collect_forces_later) { @@ -917,7 +917,7 @@ void VerletCuda::run(int n) if(not cuda->shared_data.pair.collect_forces_later) CudaWrapper_Sync(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } //calculate bonded interactions @@ -927,11 +927,11 @@ void VerletCuda::run(int n) if(n_pre_force == 0) Verlet::force_clear(); else cuda->cu_f->downloadAsync(2); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); if(neighbor->lastcall == update->ntimestep) { neighbor->build_topology(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); } test_atom(testatom, "pre bond force"); @@ -944,7 +944,7 @@ void VerletCuda::run(int n) if(force->improper) force->improper->compute(eflag, vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } //collect forces in case pair force and bonded interactions were overlapped, and either no KSPACE or a GPU KSPACE style is used @@ -969,7 +969,7 @@ void VerletCuda::run(int n) if(vflag) cuda->cu_virial->download(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); my_gettime(CLOCK_REALTIME, &endtime); cuda->shared_data.cuda_timings.pair_force_collection += @@ -987,7 +987,7 @@ void VerletCuda::run(int n) if(n_pre_force == 0) Verlet::force_clear(); else cuda->cu_f->downloadAsync(2); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } force->kspace->compute(eflag, vflag); @@ -995,7 +995,7 @@ void VerletCuda::run(int n) if((not cuda->shared_data.pppm.cudable_force) && (not cuda->shared_data.pair.collect_forces_later)) cuda->uploadAll(); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } //collect forces in case pair forces and kspace was overlaped @@ -1018,7 +1018,7 @@ void VerletCuda::run(int n) if(vflag) cuda->cu_virial->download(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); my_gettime(CLOCK_REALTIME, &endtime); cuda->shared_data.cuda_timings.pair_force_collection += @@ -1028,7 +1028,7 @@ void VerletCuda::run(int n) //send forces on ghost atoms back to other GPU: THIS SHOULD NEVER HAPPEN if(force->newton) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } test_atom(testatom, "post force"); @@ -1054,7 +1054,7 @@ void VerletCuda::run(int n) timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } diff --git a/src/USER-FEP/compute_fep.cpp b/src/USER-FEP/compute_fep.cpp index 06103f874b..10551a15b9 100644 --- a/src/USER-FEP/compute_fep.cpp +++ b/src/USER-FEP/compute_fep.cpp @@ -305,11 +305,11 @@ void ComputeFEP::compute_vector() timer->stamp(); if (force->pair && force->pair->compute_flag) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (chgflag && force->kspace && force->kspace->compute_flag) { force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } // accumulate force/energy/virial from /gpu pair styles @@ -322,11 +322,11 @@ void ComputeFEP::compute_vector() timer->stamp(); if (force->pair && force->pair->compute_flag) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (chgflag && force->kspace && force->kspace->compute_flag) { force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } // accumulate force/energy/virial from /gpu pair styles diff --git a/src/USER-INTEL/verlet_intel.cpp b/src/USER-INTEL/verlet_intel.cpp index 5bfd04639c..039e3bc36e 100644 --- a/src/USER-INTEL/verlet_intel.cpp +++ b/src/USER-INTEL/verlet_intel.cpp @@ -272,8 +272,10 @@ void VerletIntel::run(int n) // initial time integration + timer->stamp(); modify->initial_integrate(vflag); if (n_post_integrate) modify->post_integrate(); + timer->stamp(Timer::MODIFY); // regular communication vs neighbor list rebuild @@ -282,9 +284,13 @@ void VerletIntel::run(int n) if (nflag == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } else { - if (n_pre_exchange) modify->pre_exchange(); + if (n_pre_exchange) { + timer->stamp(); + modify->pre_exchange(); + timer->stamp(Timer::MODIFY); + } if (triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); if (domain->box_change) { @@ -297,10 +303,13 @@ void VerletIntel::run(int n) if (sortflag && ntimestep >= atom->nextsort) atom->sort(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); - if (n_pre_neighbor) modify->pre_neighbor(); + timer->stamp(Timer::COMM); + if (n_pre_neighbor) { + modify->pre_neighbor(); + timer->stamp(Timer::MODIFY); + } neighbor->build(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); } // force computations @@ -309,13 +318,18 @@ void VerletIntel::run(int n) // and Pair:ev_tally() needs to be called before any tallying force_clear(); - if (n_pre_force) modify->pre_force(vflag); timer->stamp(); + if (n_pre_force) { + modify->pre_force(vflag); + timer->stamp(Timer::MODIFY); + } + + if (pair_compute_flag) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (atom->molecular) { @@ -323,18 +337,18 @@ void VerletIntel::run(int n) if (force->angle) force->angle->compute(eflag,vflag); if (force->dihedral) force->dihedral->compute(eflag,vflag); if (force->improper) force->improper->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (kspace_compute_flag) { force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } #ifdef _LMP_INTEL_OFFLOAD if (sync_mode == 1) { fix_intel->sync_coprocessor(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } #endif @@ -342,13 +356,13 @@ void VerletIntel::run(int n) if (force->newton) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } #ifdef _LMP_INTEL_OFFLOAD if (sync_mode == 2) { fix_intel->sync_coprocessor(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } #endif @@ -357,13 +371,14 @@ void VerletIntel::run(int n) if (n_post_force) modify->post_force(vflag); modify->final_integrate(); if (n_end_of_step) modify->end_of_step(); + timer->stamp(Timer::MODIFY); // all output if (ntimestep == output->next) { timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } } diff --git a/src/USER-INTEL/verlet_split_intel.cpp b/src/USER-INTEL/verlet_split_intel.cpp index f1b64331c4..806b3652f9 100644 --- a/src/USER-INTEL/verlet_split_intel.cpp +++ b/src/USER-INTEL/verlet_split_intel.cpp @@ -283,7 +283,7 @@ void VerletSplitIntel::run(int n) MPI_Barrier(universe->uworld); timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); // setup initial Rspace <-> Kspace comm params @@ -329,7 +329,7 @@ void VerletSplitIntel::run(int n) if (nflag == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } else { if (n_pre_exchange) modify->pre_exchange(); if (triclinic) domain->x2lamda(atom->nlocal); @@ -344,10 +344,10 @@ void VerletSplitIntel::run(int n) if (sortflag && ntimestep >= atom->nextsort) atom->sort(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); if (n_pre_neighbor) modify->pre_neighbor(); neighbor->build(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); } } @@ -367,7 +367,7 @@ void VerletSplitIntel::run(int n) timer->stamp(); if (force->pair) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (atom->molecular) { @@ -375,25 +375,25 @@ void VerletSplitIntel::run(int n) if (force->angle) force->angle->compute(eflag,vflag); if (force->dihedral) force->dihedral->compute(eflag,vflag); if (force->improper) force->improper->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } #ifdef _LMP_INTEL_OFFLOAD if (sync_mode == 1) { fix_intel->sync_coprocessor(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } #endif if (force->newton) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } #ifdef _LMP_INTEL_OFFLOAD if (sync_mode == 2) { fix_intel->sync_coprocessor(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } #endif @@ -406,14 +406,14 @@ void VerletSplitIntel::run(int n) if (force->kspace) { timer->stamp(); force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } // TIP4P PPPM puts forces on ghost atoms, so must reverse_comm() if (tip4p_flag && force->newton) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } } @@ -425,14 +425,16 @@ void VerletSplitIntel::run(int n) // all output if (master) { + timer->stamp(); if (n_post_force) modify->post_force(vflag); modify->final_integrate(); if (n_end_of_step) modify->end_of_step(); + timer->stamp(Timer::MODIFY); if (ntimestep == output->next) { timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } } @@ -504,7 +506,7 @@ void VerletSplitIntel::rk_setup() atom->map_clear(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } } } @@ -553,7 +555,7 @@ void VerletSplitIntel::r2k_comm() if (tip4p_flag && !master) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } } diff --git a/src/USER-OMP/angle_charmm_omp.cpp b/src/USER-OMP/angle_charmm_omp.cpp index e346736948..228ca94dee 100644 --- a/src/USER-OMP/angle_charmm_omp.cpp +++ b/src/USER-OMP/angle_charmm_omp.cpp @@ -61,6 +61,7 @@ void AngleCharmmOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleCharmmOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_class2_omp.cpp b/src/USER-OMP/angle_class2_omp.cpp index bbe58ec48f..8f958b477c 100644 --- a/src/USER-OMP/angle_class2_omp.cpp +++ b/src/USER-OMP/angle_class2_omp.cpp @@ -61,6 +61,7 @@ void AngleClass2OMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleClass2OMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_cosine_delta_omp.cpp b/src/USER-OMP/angle_cosine_delta_omp.cpp index a642694222..f9891dbb3d 100644 --- a/src/USER-OMP/angle_cosine_delta_omp.cpp +++ b/src/USER-OMP/angle_cosine_delta_omp.cpp @@ -61,6 +61,7 @@ void AngleCosineDeltaOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleCosineDeltaOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_cosine_omp.cpp b/src/USER-OMP/angle_cosine_omp.cpp index 8aaddc528c..39ae3ce698 100644 --- a/src/USER-OMP/angle_cosine_omp.cpp +++ b/src/USER-OMP/angle_cosine_omp.cpp @@ -61,6 +61,7 @@ void AngleCosineOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleCosineOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_cosine_periodic_omp.cpp b/src/USER-OMP/angle_cosine_periodic_omp.cpp index d63dfd8ed1..311152f06c 100644 --- a/src/USER-OMP/angle_cosine_periodic_omp.cpp +++ b/src/USER-OMP/angle_cosine_periodic_omp.cpp @@ -63,6 +63,7 @@ void AngleCosinePeriodicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -79,6 +80,7 @@ void AngleCosinePeriodicOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp index 5abf621050..3a3c31d625 100644 --- a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp +++ b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp @@ -61,6 +61,7 @@ void AngleCosineShiftExpOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleCosineShiftExpOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_cosine_shift_omp.cpp b/src/USER-OMP/angle_cosine_shift_omp.cpp index 42a7b8d734..f9f538e553 100644 --- a/src/USER-OMP/angle_cosine_shift_omp.cpp +++ b/src/USER-OMP/angle_cosine_shift_omp.cpp @@ -61,6 +61,7 @@ void AngleCosineShiftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleCosineShiftOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_cosine_squared_omp.cpp b/src/USER-OMP/angle_cosine_squared_omp.cpp index 20ba91d1d5..c7d14468f5 100644 --- a/src/USER-OMP/angle_cosine_squared_omp.cpp +++ b/src/USER-OMP/angle_cosine_squared_omp.cpp @@ -61,6 +61,7 @@ void AngleCosineSquaredOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleCosineSquaredOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_dipole_omp.cpp b/src/USER-OMP/angle_dipole_omp.cpp index 4e67801671..cf391b2d60 100644 --- a/src/USER-OMP/angle_dipole_omp.cpp +++ b/src/USER-OMP/angle_dipole_omp.cpp @@ -65,6 +65,7 @@ void AngleDipoleOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -73,6 +74,7 @@ void AngleDipoleOMP::compute(int eflag, int vflag) else eval<0>(ifrom, ito, thr); } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region diff --git a/src/USER-OMP/angle_fourier_omp.cpp b/src/USER-OMP/angle_fourier_omp.cpp index 4d22a70b69..275eab29f1 100644 --- a/src/USER-OMP/angle_fourier_omp.cpp +++ b/src/USER-OMP/angle_fourier_omp.cpp @@ -61,6 +61,7 @@ void AngleFourierOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleFourierOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_fourier_simple_omp.cpp b/src/USER-OMP/angle_fourier_simple_omp.cpp index 9b27309e6c..5bdbfce05d 100644 --- a/src/USER-OMP/angle_fourier_simple_omp.cpp +++ b/src/USER-OMP/angle_fourier_simple_omp.cpp @@ -61,6 +61,7 @@ void AngleFourierSimpleOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleFourierSimpleOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_harmonic_omp.cpp b/src/USER-OMP/angle_harmonic_omp.cpp index 69b20f2691..917bc2d5b0 100644 --- a/src/USER-OMP/angle_harmonic_omp.cpp +++ b/src/USER-OMP/angle_harmonic_omp.cpp @@ -61,6 +61,7 @@ void AngleHarmonicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleHarmonicOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_quartic_omp.cpp b/src/USER-OMP/angle_quartic_omp.cpp index 26f783db79..70383fd50a 100644 --- a/src/USER-OMP/angle_quartic_omp.cpp +++ b/src/USER-OMP/angle_quartic_omp.cpp @@ -61,6 +61,7 @@ void AngleQuarticOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleQuarticOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_sdk_omp.cpp b/src/USER-OMP/angle_sdk_omp.cpp index 523f0836c7..9b4a967bf3 100644 --- a/src/USER-OMP/angle_sdk_omp.cpp +++ b/src/USER-OMP/angle_sdk_omp.cpp @@ -63,6 +63,7 @@ void AngleSDKOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -79,6 +80,7 @@ void AngleSDKOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/angle_table_omp.cpp b/src/USER-OMP/angle_table_omp.cpp index b45956d54e..465f4370fc 100644 --- a/src/USER-OMP/angle_table_omp.cpp +++ b/src/USER-OMP/angle_table_omp.cpp @@ -61,6 +61,7 @@ void AngleTableOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void AngleTableOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_class2_omp.cpp b/src/USER-OMP/bond_class2_omp.cpp index 47e684da1a..69decfb32a 100644 --- a/src/USER-OMP/bond_class2_omp.cpp +++ b/src/USER-OMP/bond_class2_omp.cpp @@ -57,6 +57,7 @@ void BondClass2OMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -73,6 +74,7 @@ void BondClass2OMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_fene_expand_omp.cpp b/src/USER-OMP/bond_fene_expand_omp.cpp index 5a18461420..37cb8b403e 100644 --- a/src/USER-OMP/bond_fene_expand_omp.cpp +++ b/src/USER-OMP/bond_fene_expand_omp.cpp @@ -58,6 +58,7 @@ void BondFENEExpandOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -74,6 +75,7 @@ void BondFENEExpandOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_fene_omp.cpp b/src/USER-OMP/bond_fene_omp.cpp index bd7ed4a593..e92dae999c 100644 --- a/src/USER-OMP/bond_fene_omp.cpp +++ b/src/USER-OMP/bond_fene_omp.cpp @@ -58,6 +58,7 @@ void BondFENEOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -74,6 +75,7 @@ void BondFENEOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_harmonic_omp.cpp b/src/USER-OMP/bond_harmonic_omp.cpp index 7be27fbef5..b62fd53193 100644 --- a/src/USER-OMP/bond_harmonic_omp.cpp +++ b/src/USER-OMP/bond_harmonic_omp.cpp @@ -56,6 +56,7 @@ void BondHarmonicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -72,6 +73,7 @@ void BondHarmonicOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp index 1999912fae..db2518e9a9 100644 --- a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp +++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp @@ -56,6 +56,7 @@ void BondHarmonicShiftCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -72,6 +73,7 @@ void BondHarmonicShiftCutOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_harmonic_shift_omp.cpp b/src/USER-OMP/bond_harmonic_shift_omp.cpp index 43498327c8..632db87301 100644 --- a/src/USER-OMP/bond_harmonic_shift_omp.cpp +++ b/src/USER-OMP/bond_harmonic_shift_omp.cpp @@ -56,6 +56,7 @@ void BondHarmonicShiftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -72,6 +73,7 @@ void BondHarmonicShiftOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_morse_omp.cpp b/src/USER-OMP/bond_morse_omp.cpp index 2cae149e41..d03783920b 100644 --- a/src/USER-OMP/bond_morse_omp.cpp +++ b/src/USER-OMP/bond_morse_omp.cpp @@ -56,6 +56,7 @@ void BondMorseOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -72,6 +73,7 @@ void BondMorseOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_nonlinear_omp.cpp b/src/USER-OMP/bond_nonlinear_omp.cpp index 57d547bb04..2bc77de85c 100644 --- a/src/USER-OMP/bond_nonlinear_omp.cpp +++ b/src/USER-OMP/bond_nonlinear_omp.cpp @@ -56,6 +56,7 @@ void BondNonlinearOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -72,6 +73,7 @@ void BondNonlinearOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_quartic_omp.cpp b/src/USER-OMP/bond_quartic_omp.cpp index 28c16a7458..c0ddfd0b92 100644 --- a/src/USER-OMP/bond_quartic_omp.cpp +++ b/src/USER-OMP/bond_quartic_omp.cpp @@ -62,6 +62,7 @@ void BondQuarticOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -78,6 +79,7 @@ void BondQuarticOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/bond_table_omp.cpp b/src/USER-OMP/bond_table_omp.cpp index 3803eaa02b..53226df4f7 100644 --- a/src/USER-OMP/bond_table_omp.cpp +++ b/src/USER-OMP/bond_table_omp.cpp @@ -56,6 +56,7 @@ void BondTableOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -72,6 +73,7 @@ void BondTableOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp index 50ddf5cbbc..311ef73e3b 100644 --- a/src/USER-OMP/dihedral_charmm_omp.cpp +++ b/src/USER-OMP/dihedral_charmm_omp.cpp @@ -67,6 +67,7 @@ void DihedralCharmmOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -83,6 +84,7 @@ void DihedralCharmmOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp index 701f8720cc..b4d1080cea 100644 --- a/src/USER-OMP/dihedral_class2_omp.cpp +++ b/src/USER-OMP/dihedral_class2_omp.cpp @@ -60,6 +60,7 @@ void DihedralClass2OMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void DihedralClass2OMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp index d17900d1e8..57a4561629 100644 --- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp @@ -60,6 +60,7 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_fourier_omp.cpp b/src/USER-OMP/dihedral_fourier_omp.cpp index f63f45c9a8..29d18c9fca 100644 --- a/src/USER-OMP/dihedral_fourier_omp.cpp +++ b/src/USER-OMP/dihedral_fourier_omp.cpp @@ -61,6 +61,7 @@ void DihedralFourierOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void DihedralFourierOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp index 68134d3c7a..79daf186cb 100644 --- a/src/USER-OMP/dihedral_harmonic_omp.cpp +++ b/src/USER-OMP/dihedral_harmonic_omp.cpp @@ -60,6 +60,7 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp index fdffeecdce..316fbfcb3f 100644 --- a/src/USER-OMP/dihedral_helix_omp.cpp +++ b/src/USER-OMP/dihedral_helix_omp.cpp @@ -63,6 +63,7 @@ void DihedralHelixOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -79,6 +80,7 @@ void DihedralHelixOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp index 4eabdddb85..e43c75d539 100644 --- a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp @@ -60,6 +60,7 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_nharmonic_omp.cpp b/src/USER-OMP/dihedral_nharmonic_omp.cpp index 51a3f1b752..3cf0630736 100644 --- a/src/USER-OMP/dihedral_nharmonic_omp.cpp +++ b/src/USER-OMP/dihedral_nharmonic_omp.cpp @@ -60,6 +60,7 @@ void DihedralNHarmonicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void DihedralNHarmonicOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp index 41c9691dae..187bdae3a6 100644 --- a/src/USER-OMP/dihedral_opls_omp.cpp +++ b/src/USER-OMP/dihedral_opls_omp.cpp @@ -61,6 +61,7 @@ void DihedralOPLSOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void DihedralOPLSOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_quadratic_omp.cpp b/src/USER-OMP/dihedral_quadratic_omp.cpp index 2fc72e7317..3157e3b40a 100644 --- a/src/USER-OMP/dihedral_quadratic_omp.cpp +++ b/src/USER-OMP/dihedral_quadratic_omp.cpp @@ -61,6 +61,7 @@ void DihedralQuadraticOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void DihedralQuadraticOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/dihedral_table_omp.cpp b/src/USER-OMP/dihedral_table_omp.cpp index fdc05e7ba0..1457f7b2bf 100644 --- a/src/USER-OMP/dihedral_table_omp.cpp +++ b/src/USER-OMP/dihedral_table_omp.cpp @@ -128,6 +128,7 @@ void DihedralTableOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -144,6 +145,7 @@ void DihedralTableOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/ewald_omp.cpp b/src/USER-OMP/ewald_omp.cpp index ea74c8c748..6625a1dcb8 100644 --- a/src/USER-OMP/ewald_omp.cpp +++ b/src/USER-OMP/ewald_omp.cpp @@ -118,6 +118,7 @@ void EwaldOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, nlocal, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, 0, NULL, NULL, thr); for (i = ifrom; i < ito; i++) { @@ -205,6 +206,7 @@ void EwaldOMP::compute(int eflag, int vflag) for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; } + thr->timer(Timer::KSPACE); reduce_thr(this, eflag,vflag,thr); } // end of omp parallel region diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp index 6717d04179..4a669addea 100644 --- a/src/USER-OMP/fix_omp.cpp +++ b/src/USER-OMP/fix_omp.cpp @@ -26,6 +26,7 @@ #include "update.h" #include "integrate.h" #include "min.h" +#include "timer.h" #include "fix_omp.h" #include "thr_data.h" @@ -65,7 +66,7 @@ static int get_tid() /* ---------------------------------------------------------------------- */ -FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) +FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL), _nthr(-1), _neighbor(true), _mixed(false), _reduced(true) @@ -130,7 +131,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) fprintf(screen,"using %s neighbor list subroutines\n", nmode); fprintf(screen,"prefer %s precision OpenMP force kernels\n", kmode); } - + if (logfile) { if (reset_thr) fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads); @@ -146,11 +147,12 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) thr = new ThrData *[nthreads]; _nthr = nthreads; #if defined(_OPENMP) -#pragma omp parallel default(none) +#pragma omp parallel default(none) shared(lmp) #endif { const int tid = get_tid(); - thr[tid] = new ThrData(tid); + Timer *t = new Timer(lmp); + thr[tid] = new ThrData(tid,t); } } @@ -196,11 +198,17 @@ int FixOMP::setmask() void FixOMP::init() { // USER-OMP package cannot be used with atom_style template - - if (atom->molecular == 2) + if (atom->molecular == 2) error->all(FLERR,"USER-OMP package does not (yet) work with " "atom_style template"); + // reset per thread timer + for (int i=0; i < comm->nthreads; ++i) { + thr[i]->_timer_active=1; + thr[i]->timer(Timer::RESET); + thr[i]->_timer_active=-1; + } + if ((strstr(update->integrate_style,"respa") != NULL) && (strstr(update->integrate_style,"respa/omp") == NULL)) error->all(FLERR,"Need to use respa/omp for r-RESPA with /omp styles"); @@ -272,7 +280,7 @@ void FixOMP::init() CheckStyleForOMP(improper); CheckHybridForOMP(improper,Improper); } - + if (kspace_split >= 0) { CheckStyleForOMP(kspace); } @@ -325,6 +333,15 @@ void FixOMP::set_neighbor_omp() /* ---------------------------------------------------------------------- */ +void FixOMP::setup(int) +{ + // we are post the force compute in setup. turn on timers + for (int i=0; i < comm->nthreads; ++i) + thr[i]->_timer_active=0; +} + +/* ---------------------------------------------------------------------- */ + // adjust size and clear out per thread accumulator arrays void FixOMP::pre_force(int) { diff --git a/src/USER-OMP/fix_omp.h b/src/USER-OMP/fix_omp.h index 656ab752a8..55e042dd52 100644 --- a/src/USER-OMP/fix_omp.h +++ b/src/USER-OMP/fix_omp.h @@ -36,6 +36,8 @@ class FixOMP : public Fix { virtual ~FixOMP(); virtual int setmask(); virtual void init(); + virtual void setup(int); + virtual void min_setup(int flag) { setup(flag); } virtual void pre_force(int); virtual void setup_pre_force(int vflag) { pre_force(vflag); } diff --git a/src/USER-OMP/improper_class2_omp.cpp b/src/USER-OMP/improper_class2_omp.cpp index d9e90218c0..2cf71f675c 100644 --- a/src/USER-OMP/improper_class2_omp.cpp +++ b/src/USER-OMP/improper_class2_omp.cpp @@ -60,6 +60,7 @@ void ImproperClass2OMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void ImproperClass2OMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/improper_cossq_omp.cpp b/src/USER-OMP/improper_cossq_omp.cpp index bf3dd678f6..745591a338 100644 --- a/src/USER-OMP/improper_cossq_omp.cpp +++ b/src/USER-OMP/improper_cossq_omp.cpp @@ -60,6 +60,7 @@ void ImproperCossqOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void ImproperCossqOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/improper_cvff_omp.cpp b/src/USER-OMP/improper_cvff_omp.cpp index a082a0d7bc..1d252fee5b 100644 --- a/src/USER-OMP/improper_cvff_omp.cpp +++ b/src/USER-OMP/improper_cvff_omp.cpp @@ -60,6 +60,7 @@ void ImproperCvffOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void ImproperCvffOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/improper_fourier_omp.cpp b/src/USER-OMP/improper_fourier_omp.cpp index 037eff48c9..49fcef23d4 100644 --- a/src/USER-OMP/improper_fourier_omp.cpp +++ b/src/USER-OMP/improper_fourier_omp.cpp @@ -60,6 +60,7 @@ void ImproperFourierOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void ImproperFourierOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -115,18 +117,18 @@ void ImproperFourierOMP::eval(int nfrom, int nto, ThrData * const thr) vb3z = x[i4][2] - x[i1][2]; add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i2,i3,i4,type, - vb1x,vb1y,vb1z, - vb2x,vb2y,vb2z, + vb1x,vb1y,vb1z, + vb2x,vb2y,vb2z, vb3x,vb3y,vb3z,thr); if ( all[type] ) { add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i4,i2,i3,type, vb3x,vb3y,vb3z, - vb1x,vb1y,vb1z, - vb2x,vb2y,vb2z,thr); + vb1x,vb1y,vb1z, + vb2x,vb2y,vb2z,thr); add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i3,i4,i2,type, - vb2x,vb2y,vb2z, + vb2x,vb2y,vb2z, vb3x,vb3y,vb3z, - vb1x,vb1y,vb1z,thr); + vb1x,vb1y,vb1z,thr); } } } diff --git a/src/USER-OMP/improper_harmonic_omp.cpp b/src/USER-OMP/improper_harmonic_omp.cpp index 3104322ca8..e323b75e3f 100644 --- a/src/USER-OMP/improper_harmonic_omp.cpp +++ b/src/USER-OMP/improper_harmonic_omp.cpp @@ -60,6 +60,7 @@ void ImproperHarmonicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void ImproperHarmonicOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/improper_ring_omp.cpp b/src/USER-OMP/improper_ring_omp.cpp index 1394c8986d..c1737a4f21 100644 --- a/src/USER-OMP/improper_ring_omp.cpp +++ b/src/USER-OMP/improper_ring_omp.cpp @@ -62,6 +62,7 @@ void ImproperRingOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -77,6 +78,7 @@ void ImproperRingOMP::compute(int eflag, int vflag) if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } } // end of omp parallel region diff --git a/src/USER-OMP/improper_umbrella_omp.cpp b/src/USER-OMP/improper_umbrella_omp.cpp index e0cdeaf3fc..689dbdfe65 100644 --- a/src/USER-OMP/improper_umbrella_omp.cpp +++ b/src/USER-OMP/improper_umbrella_omp.cpp @@ -60,6 +60,7 @@ void ImproperUmbrellaOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (inum > 0) { @@ -76,6 +77,7 @@ void ImproperUmbrellaOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } } + thr->timer(Timer::BOND); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/msm_cg_omp.cpp b/src/USER-OMP/msm_cg_omp.cpp index 0af044e633..26926ab775 100644 --- a/src/USER-OMP/msm_cg_omp.cpp +++ b/src/USER-OMP/msm_cg_omp.cpp @@ -312,6 +312,7 @@ void MSMCGOMP::compute(int eflag, int vflag) const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/msm_omp.cpp b/src/USER-OMP/msm_omp.cpp index a29fa43f52..7a79dc865b 100755 --- a/src/USER-OMP/msm_omp.cpp +++ b/src/USER-OMP/msm_omp.cpp @@ -66,6 +66,7 @@ void MSMOMP::compute(int eflag, int vflag) const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -74,12 +75,12 @@ void MSMOMP::compute(int eflag, int vflag) MSM direct part procedure for intermediate grid levels ------------------------------------------------------------------------- */ -void MSMOMP::direct(int n) +void MSMOMP::direct(int n) { // zero out electric potential memset(&(egrid[n][nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double)); - + // zero out virial if (vflag_atom) { @@ -169,6 +170,8 @@ void MSMOMP::direct_eval(const int nn) int i,ifrom,ito,tid,icx,icy,icz,ix,iy,iz,k; loop_setup_thr(ifrom, ito, tid, inum, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (i = ifrom; i < ito; ++i) { @@ -298,6 +301,7 @@ void MSMOMP::direct_eval(const int nn) } } } + thr->timer(Timer::KSPACE); } // end of omp parallel region if (EFLAG_GLOBAL || VFLAG_GLOBAL) { diff --git a/src/USER-OMP/neighbor_omp.h b/src/USER-OMP/neighbor_omp.h index 2b2ad24fe5..53726109e8 100644 --- a/src/USER-OMP/neighbor_omp.h +++ b/src/USER-OMP/neighbor_omp.h @@ -18,14 +18,20 @@ #include <omp.h> #endif +#include "modify.h" +#include "timer.h" +#include "fix_omp.h" +#include "thr_data.h" + namespace LAMMPS_NS { // these macros hide some ugly and redundant OpenMP related stuff #if defined(_OPENMP) // make sure we have at least one page for each thread -#define NEIGH_OMP_INIT \ - const int nthreads = comm->nthreads; +#define NEIGH_OMP_INIT \ + const int nthreads = comm->nthreads; \ + const int ifix = modify->find_fix("package_omp") // get thread id and then assign each thread a fixed chunk of atoms #define NEIGH_OMP_SETUP(num) \ @@ -34,14 +40,18 @@ namespace LAMMPS_NS { const int idelta = 1 + num/nthreads; \ const int ifrom = tid*idelta; \ const int ito = ((ifrom + idelta) > num) \ - ? num : (ifrom+idelta); + ? num : (ifrom+idelta); \ + FixOMP *fix = static_cast<FixOMP *>(modify->fix[ifix]); \ + ThrData *thr = fix->get_thr(tid); \ + thr->timer(Timer::START); -#define NEIGH_OMP_CLOSE } +#define NEIGH_OMP_CLOSE \ + thr->timer(Timer::NEIGH); \ + } #else /* !defined(_OPENMP) */ -#define NEIGH_OMP_INIT \ - const int nthreads = comm->nthreads; +#define NEIGH_OMP_INIT #define NEIGH_OMP_SETUP(num) \ const int tid = 0; \ diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp index 41be063200..ec63dead89 100644 --- a/src/USER-OMP/pair_adp_omp.cpp +++ b/src/USER-OMP/pair_adp_omp.cpp @@ -71,6 +71,7 @@ void PairADPOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (force->newton_pair) @@ -91,6 +92,7 @@ void PairADPOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -200,6 +202,7 @@ void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr) if (NEWTON_PAIR) { // reduce per thread density + thr->timer(Timer::PAIR); data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); data_reduce_thr(&(mu[0][0]), nall, comm->nthreads, 3, tid); data_reduce_thr(&(lambda[0][0]), nall, comm->nthreads, 6, tid); @@ -217,6 +220,7 @@ void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr) } else { // reduce per thread density + thr->timer(Timer::PAIR); data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); data_reduce_thr(&(mu[0][0]), nlocal, comm->nthreads, 3, tid); data_reduce_thr(&(lambda[0][0]), nlocal, comm->nthreads, 6, tid); diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp index 5907460b7f..66c56b859a 100644 --- a/src/USER-OMP/pair_airebo_omp.cpp +++ b/src/USER-OMP/pair_airebo_omp.cpp @@ -65,12 +65,14 @@ void PairAIREBOOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); FREBO_thr(ifrom,ito,evflag,eflag,vflag_atom,thr); if (ljflag) FLJ_thr(ifrom,ito,evflag,eflag,vflag_atom,thr); if (torflag) TORSION_thr(ifrom,ito,evflag,eflag,thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_beck_omp.cpp b/src/USER-OMP/pair_beck_omp.cpp index d314ab3f1d..3e261aa2fd 100644 --- a/src/USER-OMP/pair_beck_omp.cpp +++ b/src/USER-OMP/pair_beck_omp.cpp @@ -54,6 +54,7 @@ void PairBeckOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -69,6 +70,7 @@ void PairBeckOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_born_coul_long_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp index 099fadd98b..06e6da9d2c 100644 --- a/src/USER-OMP/pair_born_coul_long_omp.cpp +++ b/src/USER-OMP/pair_born_coul_long_omp.cpp @@ -60,6 +60,7 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -75,6 +76,7 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_born_coul_msm_omp.cpp b/src/USER-OMP/pair_born_coul_msm_omp.cpp index 5ee4c1e376..bd282cfaa9 100755 --- a/src/USER-OMP/pair_born_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_born_coul_msm_omp.cpp @@ -57,6 +57,7 @@ void PairBornCoulMSMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -72,6 +73,7 @@ void PairBornCoulMSMOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_born_coul_wolf_omp.cpp b/src/USER-OMP/pair_born_coul_wolf_omp.cpp index 7c06087179..9091332080 100644 --- a/src/USER-OMP/pair_born_coul_wolf_omp.cpp +++ b/src/USER-OMP/pair_born_coul_wolf_omp.cpp @@ -54,6 +54,7 @@ void PairBornCoulWolfOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -69,6 +70,7 @@ void PairBornCoulWolfOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_born_omp.cpp b/src/USER-OMP/pair_born_omp.cpp index e17b2fbe94..01fdc1d012 100644 --- a/src/USER-OMP/pair_born_omp.cpp +++ b/src/USER-OMP/pair_born_omp.cpp @@ -52,6 +52,7 @@ void PairBornOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairBornOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_brownian_omp.cpp b/src/USER-OMP/pair_brownian_omp.cpp index 15a2c25491..56d9dee308 100644 --- a/src/USER-OMP/pair_brownian_omp.cpp +++ b/src/USER-OMP/pair_brownian_omp.cpp @@ -132,6 +132,7 @@ void PairBrownianOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); // generate a random number generator instance for @@ -157,6 +158,7 @@ void PairBrownianOMP::compute(int eflag, int vflag) } } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_brownian_poly_omp.cpp b/src/USER-OMP/pair_brownian_poly_omp.cpp index 09a7ef2e59..0ac97a935b 100644 --- a/src/USER-OMP/pair_brownian_poly_omp.cpp +++ b/src/USER-OMP/pair_brownian_poly_omp.cpp @@ -132,6 +132,7 @@ void PairBrownianPolyOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); // generate a random number generator instance for @@ -150,6 +151,7 @@ void PairBrownianPolyOMP::compute(int eflag, int vflag) else eval<0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp index 9590fcea6d..054493cc01 100644 --- a/src/USER-OMP/pair_buck_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp @@ -52,6 +52,7 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -66,6 +67,7 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag) if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_buck_coul_long_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp index 6c52768ce7..6ab1a851b7 100644 --- a/src/USER-OMP/pair_buck_coul_long_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp @@ -60,6 +60,7 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -76,6 +77,7 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag) } // reduce per thread forces into global force array. + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_buck_coul_msm_omp.cpp b/src/USER-OMP/pair_buck_coul_msm_omp.cpp index 129eca130b..bd59dccf7d 100755 --- a/src/USER-OMP/pair_buck_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_msm_omp.cpp @@ -57,6 +57,7 @@ void PairBuckCoulMSMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -73,6 +74,7 @@ void PairBuckCoulMSMOMP::compute(int eflag, int vflag) } // reduce per thread forces into global force array. + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp index 5f5570543b..2443b0a61d 100644 --- a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp +++ b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp @@ -64,6 +64,7 @@ void PairBuckLongCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (order6) { @@ -302,8 +303,9 @@ void PairBuckLongCoulLongOMP::compute(int eflag, int vflag) } } } - } + } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -326,8 +328,10 @@ void PairBuckLongCoulLongOMP::compute_inner() loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(0, 0, nall, 0, 0, thr); eval_inner(ifrom, ito, thr); + thr->timer(Timer::PAIR); } // end of omp parallel region } @@ -349,8 +353,10 @@ void PairBuckLongCoulLongOMP::compute_middle() loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(0, 0, nall, 0, 0, thr); eval_middle(ifrom, ito, thr); + thr->timer(Timer::PAIR); } // end of omp parallel region } @@ -377,6 +383,7 @@ void PairBuckLongCoulLongOMP::compute_outer(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (order6) { @@ -615,8 +622,9 @@ void PairBuckLongCoulLongOMP::compute_outer(int eflag, int vflag) } } } - } + } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_buck_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp index f4a014be69..0b8dbc7f68 100644 --- a/src/USER-OMP/pair_buck_omp.cpp +++ b/src/USER-OMP/pair_buck_omp.cpp @@ -52,6 +52,7 @@ void PairBuckOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairBuckOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp index 828cf53251..cb1f201a9e 100644 --- a/src/USER-OMP/pair_cdeam_omp.cpp +++ b/src/USER-OMP/pair_cdeam_omp.cpp @@ -86,6 +86,7 @@ void PairCDEAMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (force->newton_pair) @@ -128,12 +129,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag) break; default: + { #if defined(_OPENMP) #pragma omp master #endif - error->all(FLERR,"unsupported eam/cd pair style variant"); + error->all(FLERR,"unsupported eam/cd pair style variant"); + } } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -227,6 +231,7 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) if (NEWTON_PAIR) { // reduce per thread density + thr->timer(Timer::PAIR); data_reduce_thr(rho, nall, nthreads, 1, tid); data_reduce_thr(rhoB, nall, nthreads, 1, tid); if (CDEAMVERSION==1) @@ -246,6 +251,7 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) } else { // reduce per thread density + thr->timer(Timer::PAIR); data_reduce_thr(rho, nlocal, nthreads, 1, tid); data_reduce_thr(rhoB, nlocal, nthreads, 1, tid); if (CDEAMVERSION==1) @@ -346,6 +352,7 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) } if (NEWTON_PAIR) { + thr->timer(Timer::PAIR); data_reduce_thr(D_values, nall, nthreads, 1, tid); // wait until reduction is complete @@ -361,6 +368,7 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) sync_threads(); } else { + thr->timer(Timer::PAIR); data_reduce_thr(D_values, nlocal, nthreads, 1, tid); // wait until reduction is complete @@ -410,7 +418,9 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) D_i = D_values[i] * h_prime_i / (2.0 * rho[i] * rho[i]); } else if(CDEAMVERSION == 2) { D_i = D_values[i]; - } else ASSERT(false); + } else { + ASSERT(false); + } } for(jj = 0; jj < jnum; jj++) { @@ -456,8 +466,9 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) D_j = D_values[j] * h_prime_j / (2.0 * rho[j] * rho[j]); } else if(CDEAMVERSION == 2) { D_j = D_values[j]; - } else ASSERT(false); - + } else { + ASSERT(false); + } double t2 = -rhoB[j]; if(itype == speciesB) t2 += rho[j]; fpair += D_j * rhoip * t2; @@ -491,8 +502,9 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) double x_ij = 0.5 * (x_i + x_j); // Calculate h(x_ij) polynomial function. h = evalH(x_ij); - } else ASSERT(false); - + } else { + ASSERT(false); + } fpair += h * phip; phi *= h; } diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp index 7131d8c541..e3b6472c2a 100644 --- a/src/USER-OMP/pair_colloid_omp.cpp +++ b/src/USER-OMP/pair_colloid_omp.cpp @@ -55,6 +55,7 @@ void PairColloidOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -70,6 +71,7 @@ void PairColloidOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp index 37237fb162..a075ee1f6a 100644 --- a/src/USER-OMP/pair_comb_omp.cpp +++ b/src/USER-OMP/pair_comb_omp.cpp @@ -61,6 +61,7 @@ void PairCombOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -73,6 +74,7 @@ void PairCombOMP::compute(int eflag, int vflag) } } else eval<0,0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp index db5c341f00..1478618682 100644 --- a/src/USER-OMP/pair_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_coul_cut_omp.cpp @@ -52,6 +52,7 @@ void PairCoulCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairCoulCutOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_cut_soft_omp.cpp b/src/USER-OMP/pair_coul_cut_soft_omp.cpp index cb0eb7ae6e..da6760902e 100644 --- a/src/USER-OMP/pair_coul_cut_soft_omp.cpp +++ b/src/USER-OMP/pair_coul_cut_soft_omp.cpp @@ -52,6 +52,7 @@ void PairCoulCutSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairCoulCutSoftOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_debye_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp index d4a2538420..47d3dab99e 100644 --- a/src/USER-OMP/pair_coul_debye_omp.cpp +++ b/src/USER-OMP/pair_coul_debye_omp.cpp @@ -52,6 +52,7 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_diel_omp.cpp b/src/USER-OMP/pair_coul_diel_omp.cpp index 8a02be5cb3..b100c5053b 100644 --- a/src/USER-OMP/pair_coul_diel_omp.cpp +++ b/src/USER-OMP/pair_coul_diel_omp.cpp @@ -52,6 +52,7 @@ void PairCoulDielOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairCoulDielOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_dsf_omp.cpp b/src/USER-OMP/pair_coul_dsf_omp.cpp index 846be4ceaa..c85ede20ee 100644 --- a/src/USER-OMP/pair_coul_dsf_omp.cpp +++ b/src/USER-OMP/pair_coul_dsf_omp.cpp @@ -62,6 +62,7 @@ void PairCoulDSFOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -77,6 +78,7 @@ void PairCoulDSFOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_long_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp index 069fb4d186..d77a88198b 100644 --- a/src/USER-OMP/pair_coul_long_omp.cpp +++ b/src/USER-OMP/pair_coul_long_omp.cpp @@ -61,6 +61,7 @@ void PairCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -76,6 +77,7 @@ void PairCoulLongOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_long_soft_omp.cpp b/src/USER-OMP/pair_coul_long_soft_omp.cpp index ef06bd323f..1246bb10ae 100644 --- a/src/USER-OMP/pair_coul_long_soft_omp.cpp +++ b/src/USER-OMP/pair_coul_long_soft_omp.cpp @@ -60,6 +60,7 @@ void PairCoulLongSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -75,6 +76,7 @@ void PairCoulLongSoftOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_msm_omp.cpp b/src/USER-OMP/pair_coul_msm_omp.cpp index 1af06208ef..870afb7806 100755 --- a/src/USER-OMP/pair_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_coul_msm_omp.cpp @@ -58,6 +58,7 @@ void PairCoulMSMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -73,6 +74,7 @@ void PairCoulMSMOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_wolf_omp.cpp b/src/USER-OMP/pair_coul_wolf_omp.cpp index a9ad976e4b..bcc19af0fe 100644 --- a/src/USER-OMP/pair_coul_wolf_omp.cpp +++ b/src/USER-OMP/pair_coul_wolf_omp.cpp @@ -54,6 +54,7 @@ void PairCoulWolfOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -69,6 +70,7 @@ void PairCoulWolfOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_dpd_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp index d09a2fbdae..070c2dc4c2 100644 --- a/src/USER-OMP/pair_dpd_omp.cpp +++ b/src/USER-OMP/pair_dpd_omp.cpp @@ -77,6 +77,7 @@ void PairDPDOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); // generate a random number generator instance for @@ -98,6 +99,7 @@ void PairDPDOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_dpd_tstat_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp index 26d14ec8aa..fc725e8cf5 100644 --- a/src/USER-OMP/pair_dpd_tstat_omp.cpp +++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp @@ -77,6 +77,7 @@ void PairDPDTstatOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); // generate a random number generator instance for @@ -98,6 +99,7 @@ void PairDPDTstatOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp index 808353b881..cff4daf5ea 100644 --- a/src/USER-OMP/pair_eam_omp.cpp +++ b/src/USER-OMP/pair_eam_omp.cpp @@ -66,6 +66,7 @@ void PairEAMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (force->newton_pair) @@ -86,6 +87,7 @@ void PairEAMOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -162,6 +164,7 @@ void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr) if (NEWTON_PAIR) { // reduce per thread density + thr->timer(Timer::PAIR); data_reduce_thr(rho, nall, nthreads, 1, tid); // wait until reduction is complete @@ -176,6 +179,7 @@ void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr) sync_threads(); } else { + thr->timer(Timer::PAIR); data_reduce_thr(rho, nlocal, nthreads, 1, tid); // wait until reduction is complete diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp index 50ebaae31a..f86d8ebcbc 100644 --- a/src/USER-OMP/pair_edip_omp.cpp +++ b/src/USER-OMP/pair_edip_omp.cpp @@ -59,6 +59,7 @@ void PairEDIPOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -71,6 +72,7 @@ void PairEDIPOMP::compute(int eflag, int vflag) } } else eval<0,0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp index 916bc99380..501c282f79 100644 --- a/src/USER-OMP/pair_eim_omp.cpp +++ b/src/USER-OMP/pair_eim_omp.cpp @@ -66,6 +66,7 @@ void PairEIMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (force->newton_pair) @@ -86,6 +87,7 @@ void PairEIMOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -162,6 +164,7 @@ void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr) // communicate and sum densities if (NEWTON_PAIR) { // reduce per thread density + thr->timer(Timer::PAIR); data_reduce_thr(rho, nall, nthreads, 1, tid); // wait until reduction is complete @@ -176,6 +179,7 @@ void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr) } } else { + thr->timer(Timer::PAIR); data_reduce_thr(rho, nlocal, nthreads, 1, tid); // wait until reduction is complete @@ -234,6 +238,7 @@ void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr) // communicate and sum modified densities if (NEWTON_PAIR) { // reduce per thread density + thr->timer(Timer::PAIR); data_reduce_thr(fp, nall, nthreads, 1, tid); // wait until reduction is complete @@ -248,6 +253,7 @@ void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr) } } else { + thr->timer(Timer::PAIR); data_reduce_thr(fp, nlocal, nthreads, 1, tid); // wait until reduction is complete diff --git a/src/USER-OMP/pair_gauss_cut_omp.cpp b/src/USER-OMP/pair_gauss_cut_omp.cpp index cb0197c965..e80768d5af 100644 --- a/src/USER-OMP/pair_gauss_cut_omp.cpp +++ b/src/USER-OMP/pair_gauss_cut_omp.cpp @@ -52,6 +52,7 @@ void PairGaussCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairGaussCutOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_gauss_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp index 5255b6ea3d..4d8b477465 100644 --- a/src/USER-OMP/pair_gauss_omp.cpp +++ b/src/USER-OMP/pair_gauss_omp.cpp @@ -54,6 +54,7 @@ void PairGaussOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -69,6 +70,7 @@ void PairGaussOMP::compute(int eflag, int vflag) else occ = eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp index ed5cc92701..031b1d4ddd 100644 --- a/src/USER-OMP/pair_gayberne_omp.cpp +++ b/src/USER-OMP/pair_gayberne_omp.cpp @@ -54,6 +54,7 @@ void PairGayBerneOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -69,6 +70,7 @@ void PairGayBerneOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp index e6c0063e3a..d684fde19d 100644 --- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp @@ -77,6 +77,7 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) @@ -86,6 +87,7 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag) if (shearupdate) eval<0,1>(ifrom, ito, thr); else eval<0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp index 5a229afc64..3da16dea16 100644 --- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp @@ -79,6 +79,7 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) @@ -88,6 +89,7 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) if (shearupdate) eval<0,1>(ifrom, ito, thr); else eval<0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -95,7 +97,7 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) template <int EVFLAG, int SHEARUPDATE> void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) { - int i,j,ii,jj,jnum,itype,jtype; + int i,j,ii,jj,jnum; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; double myshear[3]; double radi,radj,radsum,rsq,r,rinv,rsqinv; diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp index c99dfe6c27..c4e792708f 100644 --- a/src/USER-OMP/pair_gran_hooke_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_omp.cpp @@ -74,6 +74,7 @@ void PairGranHookeOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) @@ -83,6 +84,7 @@ void PairGranHookeOMP::compute(int eflag, int vflag) if (force->newton_pair) eval<0,1>(ifrom, ito, thr); else eval<0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -90,7 +92,7 @@ void PairGranHookeOMP::compute(int eflag, int vflag) template <int EVFLAG, int NEWTON_PAIR> void PairGranHookeOMP::eval(int iifrom, int iito, ThrData * const thr) { - int i,j,ii,jj,jnum,itype,jtype; + int i,j,ii,jj,jnum; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; double radi,radj,radsum,rsq,r,rinv,rsqinv; double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp index 9dd82ae253..26c456c170 100644 --- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp @@ -83,6 +83,7 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -98,6 +99,7 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp index 9efb5d63d2..557852fb2d 100644 --- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp @@ -83,6 +83,7 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -98,6 +99,7 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region diff --git a/src/USER-OMP/pair_line_lj_omp.cpp b/src/USER-OMP/pair_line_lj_omp.cpp index 27c630166e..8a9cf0b0d2 100644 --- a/src/USER-OMP/pair_line_lj_omp.cpp +++ b/src/USER-OMP/pair_line_lj_omp.cpp @@ -79,6 +79,7 @@ void PairLineLJOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -94,6 +95,7 @@ void PairLineLJOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj96_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp index 13a2252298..451d876a48 100644 --- a/src/USER-OMP/pair_lj96_cut_omp.cpp +++ b/src/USER-OMP/pair_lj96_cut_omp.cpp @@ -53,6 +53,7 @@ void PairLJ96CutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -68,6 +69,7 @@ void PairLJ96CutOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp index e30052ddb2..b8f9d68807 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp @@ -52,6 +52,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp index 8aff427b74..35f3867c9d 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp @@ -52,6 +52,7 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp index 41fd5e74cc..caaeb8f916 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp @@ -53,6 +53,7 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -68,6 +69,7 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp index 9015161359..a1ccbffab4 100644 --- a/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp @@ -53,6 +53,7 @@ void PairLJCharmmCoulLongSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -68,6 +69,7 @@ void PairLJCharmmCoulLongSoftOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -127,7 +129,6 @@ void PairLJCharmmCoulLongSoftOMP::eval(int iifrom, int iito, ThrData * const thr const int jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - const double r2inv = 1.0/rsq; if (rsq < cut_coulsq) { const double A1 = 0.254829592; diff --git a/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp index f13515283a..2fcfe213a4 100755 --- a/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp @@ -58,6 +58,7 @@ void PairLJCharmmCoulMSMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -73,6 +74,7 @@ void PairLJCharmmCoulMSMOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -134,9 +136,9 @@ void PairLJCharmmCoulMSMOMP::eval(int iifrom, int iito, ThrData * const thr) const double r = sqrt(rsq); const double prefactor = qqrd2e * qtmp*q[j]/r; - const double egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul); - const double fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul); - forcecoul = prefactor * (fgamma - 1.0); + const double egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul); + const double fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul); + forcecoul = prefactor * (fgamma - 1.0); if (EFLAG) ecoul = prefactor*egamma; if (sbindex) { diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp index e51026ecb0..1ba825ab8e 100644 --- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp @@ -52,6 +52,7 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp index 08efafa324..6b535a59a5 100644 --- a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp @@ -60,6 +60,7 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -75,6 +76,7 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_class2_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp index a2b88699e8..8c920ab9a8 100644 --- a/src/USER-OMP/pair_lj_class2_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_omp.cpp @@ -52,6 +52,7 @@ void PairLJClass2OMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJClass2OMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cubic_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp index 124bc87899..2caf4620a3 100644 --- a/src/USER-OMP/pair_lj_cubic_omp.cpp +++ b/src/USER-OMP/pair_lj_cubic_omp.cpp @@ -53,6 +53,7 @@ void PairLJCubicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -68,6 +69,7 @@ void PairLJCubicOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp index 3cbb2977c0..49c7ba128b 100644 --- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp @@ -52,6 +52,7 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp index ba4be2aca0..f6db06a51c 100644 --- a/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp @@ -52,6 +52,7 @@ void PairLJCutCoulCutSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJCutCoulCutSoftOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp index 28745e89c7..405755ecbb 100644 --- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp @@ -52,6 +52,7 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp index c7ffc8ef89..f33ea97e9c 100644 --- a/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp @@ -62,6 +62,7 @@ void PairLJCutCoulDSFOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -77,6 +78,7 @@ void PairLJCutCoulDSFOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -156,7 +158,6 @@ void PairLJCutCoulDSFOMP::eval(int iifrom, int iito, ThrData * const thr) forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + r*f_shift) * r; if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; - fpair = forcecoul * r2inv; } else forcecoul = 0.0; fpair = (forcecoul + factor_lj*forcelj) * r2inv; diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp index 6061374bba..842b84270c 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp @@ -61,6 +61,7 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -76,6 +77,7 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp index d23ba2fa5c..cdfd4a4c2d 100644 --- a/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp @@ -61,6 +61,7 @@ void PairLJCutCoulLongSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -76,6 +77,7 @@ void PairLJCutCoulLongSoftOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp index c355d4aff2..4b53b7784b 100755 --- a/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp @@ -58,6 +58,7 @@ void PairLJCutCoulMSMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -73,6 +74,7 @@ void PairLJCutCoulMSMOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp index c7ec43af52..8c22961c1e 100755 --- a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp @@ -52,6 +52,7 @@ void PairLJCutDipoleCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJCutDipoleCutOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp index 4d9f6af81f..5804e41119 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_omp.cpp @@ -53,6 +53,7 @@ void PairLJCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,7 +68,7 @@ void PairLJCutOMP::compute(int eflag, int vflag) if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } - + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_soft_omp.cpp index 953ebea366..92f0cc339f 100644 --- a/src/USER-OMP/pair_lj_cut_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_soft_omp.cpp @@ -53,6 +53,7 @@ void PairLJCutSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJCutSoftOMP::compute(int eflag, int vflag) if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp index 92ab9598b6..5bc0165120 100644 --- a/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp @@ -52,7 +52,7 @@ PairLJCutTIP4PCutOMP::PairLJCutTIP4PCutOMP(LAMMPS *lmp) : /* ---------------------------------------------------------------------- */ -PairLJCutTIP4PCutOMP::~PairLJCutTIP4PCutOMP() +PairLJCutTIP4PCutOMP::~PairLJCutTIP4PCutOMP() { memory->destroy(hneigh_thr); memory->destroy(newsite_thr); @@ -101,6 +101,7 @@ void PairLJCutTIP4PCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -113,6 +114,7 @@ void PairLJCutTIP4PCutOMP::compute(int eflag, int vflag) } } else eval<0,0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp index 5dfd75421f..3646c0a2e8 100644 --- a/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp @@ -52,7 +52,7 @@ PairLJCutTIP4PLongOMP::PairLJCutTIP4PLongOMP(LAMMPS *lmp) : /* ---------------------------------------------------------------------- */ -PairLJCutTIP4PLongOMP::~PairLJCutTIP4PLongOMP() +PairLJCutTIP4PLongOMP::~PairLJCutTIP4PLongOMP() { memory->destroy(hneigh_thr); memory->destroy(newsite_thr); @@ -101,6 +101,7 @@ void PairLJCutTIP4PLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (!ncoultablebits) { @@ -125,6 +126,7 @@ void PairLJCutTIP4PLongOMP::compute(int eflag, int vflag) } else eval<0,0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp index f31032788f..047b42775d 100644 --- a/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp @@ -101,6 +101,7 @@ void PairLJCutTIP4PLongSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -113,6 +114,7 @@ void PairLJCutTIP4PLongSoftOMP::compute(int eflag, int vflag) } } else eval<0,0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_expand_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp index 8f1f931822..187cc5f78d 100644 --- a/src/USER-OMP/pair_lj_expand_omp.cpp +++ b/src/USER-OMP/pair_lj_expand_omp.cpp @@ -52,6 +52,7 @@ void PairLJExpandOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJExpandOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp index 61fb667a69..3e3fbbc9dd 100644 --- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp @@ -52,6 +52,7 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp index 964a9bda8d..618e20ab66 100644 --- a/src/USER-OMP/pair_lj_gromacs_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp @@ -52,6 +52,7 @@ void PairLJGromacsOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJGromacsOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp index aafe9a3ab2..23a2bc3cc4 100644 --- a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp @@ -64,6 +64,7 @@ void PairLJLongCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (order6) { @@ -302,8 +303,9 @@ void PairLJLongCoulLongOMP::compute(int eflag, int vflag) } } } - } + } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -324,8 +326,10 @@ void PairLJLongCoulLongOMP::compute_inner() loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(0, 0, nall, 0, 0, thr); eval_inner(ifrom, ito, thr); + thr->timer(Timer::PAIR); } // end of omp parallel region } @@ -347,8 +351,10 @@ void PairLJLongCoulLongOMP::compute_middle() loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(0, 0, nall, 0, 0, thr); eval_middle(ifrom, ito, thr); + thr->timer(Timer::PAIR); } // end of omp parallel region } @@ -375,6 +381,7 @@ void PairLJLongCoulLongOMP::compute_outer(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (order6) { @@ -613,8 +620,9 @@ void PairLJLongCoulLongOMP::compute_outer(int eflag, int vflag) } } } - } + } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -979,7 +987,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr { double evdwl,ecoul,fvirial,fpair; evdwl = ecoul = 0.0; - + const double * const * const x = atom->x; double * const * const f = thr->get_f(); const double * const q = atom->q; @@ -993,7 +1001,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr double *f0 = f[0], *fi = f0; int *ilist = listouter->ilist; - + int i, j, ii; int *jneigh, *jneighn, typei, typej, ni, respa_flag; double qi = 0.0, qri = 0.0; @@ -1002,16 +1010,16 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2; double respa_lj = 0.0, respa_coul = 0.0, frespa = 0.0; vector xi, d; - + const double cut_in_off = cut_respa[2]; const double cut_in_on = cut_respa[3]; - + const double cut_in_diff = cut_in_on - cut_in_off; const double cut_in_off_sq = cut_in_off*cut_in_off; const double cut_in_on_sq = cut_in_on*cut_in_on; - + //ineighn = (ineigh = list->ilist)+list->inum; - + for (ii = iiform; ii < iito; ++ii) { // loop over my atoms i = ilist[ii]; fi = f0+3*i; if (ORDER1) qri = (qi = q[i])*qqrd2e; // initialize constants @@ -1020,20 +1028,20 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; - + for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; ni = sbmask(j); j &= NEIGHMASK; - + { register const double *xj = x0+(j+(j<<1)); d[0] = xi[0] - xj[0]; // pair vector d[1] = xi[1] - xj[1]; d[2] = xi[2] - xj[2]; } - + if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue; r2inv = 1.0/rsq; - + frespa = 1.0; // check whether and how to compute respa corrections respa_coul = 0; respa_lj = 0; @@ -1042,7 +1050,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr register double rsw = (sqrt(rsq)-cut_in_off)/cut_in_diff; frespa = 1-rsw*rsw*(3.0-2.0*rsw); } - + if (ORDER1 && (rsq < cut_coulsq)) { // coulombic if (!CTABLE || rsq <= tabinnersq) { // series real space register double r = sqrt(rsq), s = qri*q[j]; @@ -1083,7 +1091,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr } } } - + else force_coul = respa_coul = ecoul = 0.0; if (rsq < cut_ljsqi[typej]) { // lennard-jones @@ -1139,9 +1147,9 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr } } else force_lj = respa_lj = evdwl = 0.0; - + fpair = (force_coul+force_lj)*r2inv; - + if (NEWTON_PAIR || j < nlocal) { register double *fj = f0+(j+(j<<1)), f; fi[0] += f = d[0]*fpair; fj[0] -= f; @@ -1153,7 +1161,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr fi[1] += d[1]*fpair; fi[2] += d[2]*fpair; } - + if (EVFLAG) { fvirial = (force_coul + force_lj + respa_coul + respa_lj)*r2inv; ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, diff --git a/src/USER-OMP/pair_lj_long_coul_long_omp.h b/src/USER-OMP/pair_lj_long_coul_long_omp.h index f10cbfd510..dd47231a9c 100644 --- a/src/USER-OMP/pair_lj_long_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_long_coul_long_omp.h @@ -55,7 +55,7 @@ class PairLJLongCoulLongOMP : public PairLJLongCoulLong, public ThrOMP { void eval_inner(int, int, ThrData *const); void eval_middle(int, int, ThrData *const); - + }; diff --git a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp index cace817fe5..4c86ce7d33 100644 --- a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp @@ -104,6 +104,7 @@ void PairLJLongTIP4PLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (order6) { @@ -342,8 +343,9 @@ void PairLJLongTIP4PLongOMP::compute(int eflag, int vflag) } } } - } + } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -385,8 +387,10 @@ void PairLJLongTIP4PLongOMP::compute_inner() loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(0, 0, nall, 0, 0, thr); eval_inner(ifrom, ito, thr); + thr->timer(Timer::PAIR); } // end of omp parallel region } @@ -408,8 +412,10 @@ void PairLJLongTIP4PLongOMP::compute_middle() loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(0, 0, nall, 0, 0, thr); eval_middle(ifrom, ito, thr); + thr->timer(Timer::PAIR); } // end of omp parallel region } @@ -460,6 +466,7 @@ void PairLJLongTIP4PLongOMP::compute_outer(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (order6) { @@ -698,8 +705,9 @@ void PairLJLongTIP4PLongOMP::compute_outer(int eflag, int vflag) } } } - } + } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_long_tip4p_long_omp.h b/src/USER-OMP/pair_lj_long_tip4p_long_omp.h index af0e3053b4..171edb6ddd 100644 --- a/src/USER-OMP/pair_lj_long_tip4p_long_omp.h +++ b/src/USER-OMP/pair_lj_long_tip4p_long_omp.h @@ -62,7 +62,7 @@ class PairLJLongTIP4PLongOMP : public PairLJLongTIP4PLong, public ThrOMP { void compute_newsite_thr(const dbl3_t &, const dbl3_t &, const dbl3_t &, dbl3_t &) const; - + }; diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp index 2c85f0f2e0..c86a1f6c0e 100644 --- a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp +++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp @@ -54,6 +54,7 @@ void PairLJSDKCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -69,6 +70,7 @@ void PairLJSDKCoulLongOMP::compute(int eflag, int vflag) else eval_thr<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp index 62d1ae56fd..95013932e8 100644 --- a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp +++ b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp @@ -60,6 +60,7 @@ void PairLJSDKCoulMSMOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -75,6 +76,7 @@ void PairLJSDKCoulMSMOMP::compute(int eflag, int vflag) else eval_msm_thr<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_sdk_omp.cpp b/src/USER-OMP/pair_lj_sdk_omp.cpp index da2eb492ff..ca32528ee0 100644 --- a/src/USER-OMP/pair_lj_sdk_omp.cpp +++ b/src/USER-OMP/pair_lj_sdk_omp.cpp @@ -56,6 +56,7 @@ void PairLJSDKOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -71,6 +72,7 @@ void PairLJSDKOMP::compute(int eflag, int vflag) else eval_thr<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp index dbaf1a9639..572c775467 100755 --- a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp +++ b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp @@ -52,6 +52,7 @@ void PairLJSFDipoleSFOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJSFDipoleSFOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp index 82595c7578..c1bdbcc93e 100644 --- a/src/USER-OMP/pair_lj_sf_omp.cpp +++ b/src/USER-OMP/pair_lj_sf_omp.cpp @@ -52,6 +52,7 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_smooth_linear_omp.cpp b/src/USER-OMP/pair_lj_smooth_linear_omp.cpp index 7a8440fa58..6ad61b0dbf 100644 --- a/src/USER-OMP/pair_lj_smooth_linear_omp.cpp +++ b/src/USER-OMP/pair_lj_smooth_linear_omp.cpp @@ -52,6 +52,7 @@ void PairLJSmoothLinearOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJSmoothLinearOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lj_smooth_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp index b38ff2311b..d2eee11102 100644 --- a/src/USER-OMP/pair_lj_smooth_omp.cpp +++ b/src/USER-OMP/pair_lj_smooth_omp.cpp @@ -52,6 +52,7 @@ void PairLJSmoothOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairLJSmoothOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lubricate_omp.cpp b/src/USER-OMP/pair_lubricate_omp.cpp index 877be4bed2..3da80c8c5a 100644 --- a/src/USER-OMP/pair_lubricate_omp.cpp +++ b/src/USER-OMP/pair_lubricate_omp.cpp @@ -122,6 +122,7 @@ void PairLubricateOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (flaglog) { @@ -142,6 +143,7 @@ void PairLubricateOMP::compute(int eflag, int vflag) } } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_lubricate_poly_omp.cpp b/src/USER-OMP/pair_lubricate_poly_omp.cpp index a9161805b6..abe27b7434 100644 --- a/src/USER-OMP/pair_lubricate_poly_omp.cpp +++ b/src/USER-OMP/pair_lubricate_poly_omp.cpp @@ -120,6 +120,7 @@ void PairLubricatePolyOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (flaglog) { @@ -144,6 +145,7 @@ void PairLubricatePolyOMP::compute(int eflag, int vflag) } } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_meam_spline_omp.cpp b/src/USER-OMP/pair_meam_spline_omp.cpp index 69eeb1e120..ebe576e59b 100644 --- a/src/USER-OMP/pair_meam_spline_omp.cpp +++ b/src/USER-OMP/pair_meam_spline_omp.cpp @@ -66,6 +66,7 @@ void PairMEAMSplineOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); thr->init_eam(nall,Uprime_values); @@ -80,6 +81,7 @@ void PairMEAMSplineOMP::compute(int eflag, int vflag) eval<0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -250,6 +252,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) sync_threads(); // reduce per thread density + thr->timer(Timer::PAIR); data_reduce_thr(Uprime_values, nall, nthreads, 1, tid); // wait until reduction is complete so that master thread diff --git a/src/USER-OMP/pair_morse_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp index 448b265de7..b27829d897 100644 --- a/src/USER-OMP/pair_morse_omp.cpp +++ b/src/USER-OMP/pair_morse_omp.cpp @@ -52,6 +52,7 @@ void PairMorseOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairMorseOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_nb3b_harmonic_omp.cpp b/src/USER-OMP/pair_nb3b_harmonic_omp.cpp index 1877b26fbc..b36c4dd77d 100644 --- a/src/USER-OMP/pair_nb3b_harmonic_omp.cpp +++ b/src/USER-OMP/pair_nb3b_harmonic_omp.cpp @@ -52,6 +52,7 @@ void PairNb3bHarmonicOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -62,6 +63,7 @@ void PairNb3bHarmonicOMP::compute(int eflag, int vflag) } } else eval<0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp index b3fafc1693..f4fa780c60 100644 --- a/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp +++ b/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp @@ -52,6 +52,7 @@ void PairNMCutCoulCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairNMCutCoulCutOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp b/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp index b198693680..c5e7ebd621 100644 --- a/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp +++ b/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp @@ -60,6 +60,7 @@ void PairNMCutCoulLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -75,6 +76,7 @@ void PairNMCutCoulLongOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_nm_cut_omp.cpp b/src/USER-OMP/pair_nm_cut_omp.cpp index 27f1d46931..a988633acf 100644 --- a/src/USER-OMP/pair_nm_cut_omp.cpp +++ b/src/USER-OMP/pair_nm_cut_omp.cpp @@ -52,6 +52,7 @@ void PairNMCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -66,6 +67,7 @@ void PairNMCutOMP::compute(int eflag, int vflag) if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp index 640369ac03..92b7037127 100644 --- a/src/USER-OMP/pair_peri_lps_omp.cpp +++ b/src/USER-OMP/pair_peri_lps_omp.cpp @@ -71,6 +71,7 @@ void PairPeriLPSOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -86,6 +87,7 @@ void PairPeriLPSOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp index 86d3cc20f2..8895b3ba6c 100644 --- a/src/USER-OMP/pair_peri_pmb_omp.cpp +++ b/src/USER-OMP/pair_peri_pmb_omp.cpp @@ -67,6 +67,7 @@ void PairPeriPMBOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -82,6 +83,7 @@ void PairPeriPMBOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp index 4e3bfded47..c38070304d 100644 --- a/src/USER-OMP/pair_resquared_omp.cpp +++ b/src/USER-OMP/pair_resquared_omp.cpp @@ -54,6 +54,7 @@ void PairRESquaredOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -69,6 +70,7 @@ void PairRESquaredOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_soft_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp index f262e1d411..51cac9106b 100644 --- a/src/USER-OMP/pair_soft_omp.cpp +++ b/src/USER-OMP/pair_soft_omp.cpp @@ -56,6 +56,7 @@ void PairSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -71,6 +72,7 @@ void PairSoftOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp index 1506359a7a..c90424e735 100644 --- a/src/USER-OMP/pair_sw_omp.cpp +++ b/src/USER-OMP/pair_sw_omp.cpp @@ -52,6 +52,7 @@ void PairSWOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -62,6 +63,7 @@ void PairSWOMP::compute(int eflag, int vflag) } } else eval<0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_table_omp.cpp b/src/USER-OMP/pair_table_omp.cpp index 9e1340b300..f8c6a9f186 100644 --- a/src/USER-OMP/pair_table_omp.cpp +++ b/src/USER-OMP/pair_table_omp.cpp @@ -53,6 +53,7 @@ void PairTableOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -68,6 +69,7 @@ void PairTableOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_tersoff_mod_omp.cpp b/src/USER-OMP/pair_tersoff_mod_omp.cpp index 02e4962567..907bb9e078 100644 --- a/src/USER-OMP/pair_tersoff_mod_omp.cpp +++ b/src/USER-OMP/pair_tersoff_mod_omp.cpp @@ -52,6 +52,7 @@ void PairTersoffMODOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -64,6 +65,7 @@ void PairTersoffMODOMP::compute(int eflag, int vflag) } } else eval<0,0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp index 8d89caa5e3..ff0f41b282 100644 --- a/src/USER-OMP/pair_tersoff_omp.cpp +++ b/src/USER-OMP/pair_tersoff_omp.cpp @@ -52,6 +52,7 @@ void PairTersoffOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -64,6 +65,7 @@ void PairTersoffOMP::compute(int eflag, int vflag) } } else eval<0,0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_tersoff_table_omp.cpp b/src/USER-OMP/pair_tersoff_table_omp.cpp index 86d8cdecc0..bd786c7ca9 100644 --- a/src/USER-OMP/pair_tersoff_table_omp.cpp +++ b/src/USER-OMP/pair_tersoff_table_omp.cpp @@ -77,6 +77,7 @@ void PairTersoffTableOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) @@ -84,6 +85,7 @@ void PairTersoffTableOMP::compute(int eflag, int vflag) else eval<1,0>(ifrom, ito, thr); else eval<0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_tip4p_cut_omp.cpp b/src/USER-OMP/pair_tip4p_cut_omp.cpp index d9f75cba9a..72c582acb8 100644 --- a/src/USER-OMP/pair_tip4p_cut_omp.cpp +++ b/src/USER-OMP/pair_tip4p_cut_omp.cpp @@ -52,7 +52,7 @@ PairTIP4PCutOMP::PairTIP4PCutOMP(LAMMPS *lmp) : /* ---------------------------------------------------------------------- */ -PairTIP4PCutOMP::~PairTIP4PCutOMP() +PairTIP4PCutOMP::~PairTIP4PCutOMP() { memory->destroy(hneigh_thr); memory->destroy(newsite_thr); @@ -100,6 +100,7 @@ void PairTIP4PCutOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -112,6 +113,7 @@ void PairTIP4PCutOMP::compute(int eflag, int vflag) } } else eval<0,0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_tip4p_long_omp.cpp b/src/USER-OMP/pair_tip4p_long_omp.cpp index fee669e53b..3476ed5928 100644 --- a/src/USER-OMP/pair_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_tip4p_long_omp.cpp @@ -52,7 +52,7 @@ PairTIP4PLongOMP::PairTIP4PLongOMP(LAMMPS *lmp) : /* ---------------------------------------------------------------------- */ -PairTIP4PLongOMP::~PairTIP4PLongOMP() +PairTIP4PLongOMP::~PairTIP4PLongOMP() { memory->destroy(hneigh_thr); memory->destroy(newsite_thr); @@ -101,6 +101,7 @@ void PairTIP4PLongOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (!ncoultablebits) { @@ -125,6 +126,7 @@ void PairTIP4PLongOMP::compute(int eflag, int vflag) } else eval<0,0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_tip4p_long_soft_omp.cpp b/src/USER-OMP/pair_tip4p_long_soft_omp.cpp index 6d76e638b9..f693e0a06a 100644 --- a/src/USER-OMP/pair_tip4p_long_soft_omp.cpp +++ b/src/USER-OMP/pair_tip4p_long_soft_omp.cpp @@ -101,6 +101,7 @@ void PairTIP4PLongSoftOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -113,6 +114,7 @@ void PairTIP4PLongSoftOMP::compute(int eflag, int vflag) } } else eval<0,0,0>(ifrom, ito, thr); + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_tri_lj_omp.cpp b/src/USER-OMP/pair_tri_lj_omp.cpp index 16dce231ba..3e1e1fe93f 100644 --- a/src/USER-OMP/pair_tri_lj_omp.cpp +++ b/src/USER-OMP/pair_tri_lj_omp.cpp @@ -93,6 +93,7 @@ void PairTriLJOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -108,6 +109,7 @@ void PairTriLJOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp index 36b5d82c0b..c3c73fab58 100644 --- a/src/USER-OMP/pair_yukawa_colloid_omp.cpp +++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp @@ -52,6 +52,7 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_yukawa_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp index ba2345983c..9bb0dea9c3 100644 --- a/src/USER-OMP/pair_yukawa_omp.cpp +++ b/src/USER-OMP/pair_yukawa_omp.cpp @@ -52,6 +52,7 @@ void PairYukawaOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairYukawaOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_zbl_omp.cpp b/src/USER-OMP/pair_zbl_omp.cpp index 454934f679..281ee52acb 100644 --- a/src/USER-OMP/pair_zbl_omp.cpp +++ b/src/USER-OMP/pair_zbl_omp.cpp @@ -53,6 +53,7 @@ void PairZBLOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) { @@ -67,6 +68,7 @@ void PairZBLOMP::compute(int eflag, int vflag) if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); else eval<0,0,0>(ifrom, ito, thr); } + thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pppm_cg_omp.cpp b/src/USER-OMP/pppm_cg_omp.cpp index 3a0c3f5806..021765d14b 100644 --- a/src/USER-OMP/pppm_cg_omp.cpp +++ b/src/USER-OMP/pppm_cg_omp.cpp @@ -138,6 +138,8 @@ void PPPMCGOMP::compute_gf_ik() int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { m = n / (numl*numk); @@ -190,6 +192,7 @@ void PPPMCGOMP::compute_gf_ik() greensfn[n] = numerator*sum1/denominator; } else greensfn[n] = 0.0; } + thr->timer(Timer::KSPACE); } // end of parallel region } @@ -226,6 +229,8 @@ void PPPMCGOMP::compute_gf_ad() int k,l,m,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { @@ -279,8 +284,9 @@ void PPPMCGOMP::compute_gf_ad() sf5 += sf_precoeff6[n]*greensfn[n]; } } + thr->timer(Timer::KSPACE); } // end of paralle region - + // compute the coefficients for the self-force correction double prex, prey, prez, tmp[6]; @@ -321,6 +327,7 @@ void PPPMCGOMP::compute(int eflag, int vflag) const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -365,6 +372,7 @@ void PPPMCGOMP::make_rho() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); // loop over my charges, add their contribution to nearby grid points @@ -380,7 +388,7 @@ void PPPMCGOMP::make_rho() const int ny = p2g[i].b; const int nz = p2g[i].t; - // pre-screen whether this atom will ever come within + // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; @@ -413,6 +421,7 @@ void PPPMCGOMP::make_rho() } } } + thr->timer(Timer::KSPACE); } } @@ -448,6 +457,7 @@ void PPPMCGOMP::fieldforce_ik() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); @@ -487,6 +497,7 @@ void PPPMCGOMP::fieldforce_ik() f[i].y += qfactor*eky; if (slabflag != 2) f[i].z += qfactor*ekz; } + thr->timer(Timer::KSPACE); } // end of parallel region } @@ -528,6 +539,7 @@ void PPPMCGOMP::fieldforce_ad() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d()); @@ -585,6 +597,7 @@ void PPPMCGOMP::fieldforce_ad() sf *= 2*qi; if (slabflag != 2) f[i].z += qfactor*(ekz - sf); } + thr->timer(Timer::KSPACE); } // end of parallel region } @@ -619,6 +632,7 @@ void PPPMCGOMP::fieldforce_peratom() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); for (int j=ifrom; j < ito; ++j) { @@ -667,6 +681,7 @@ void PPPMCGOMP::fieldforce_peratom() vatom[i][5] += qi*v5; } } + thr->timer(Timer::KSPACE); } // end of parallel region } diff --git a/src/USER-OMP/pppm_disp_omp.cpp b/src/USER-OMP/pppm_disp_omp.cpp index b22b553341..f3692b287a 100644 --- a/src/USER-OMP/pppm_disp_omp.cpp +++ b/src/USER-OMP/pppm_disp_omp.cpp @@ -74,7 +74,7 @@ void PPPMDispOMP::allocate() } if (function[1] + function[2]) { ThrData * thr = fix->get_thr(tid); - thr->init_pppm_disp(order_6,memory); + thr->init_pppm_disp(order_6,memory); } } } @@ -142,6 +142,8 @@ void PPPMDispOMP::compute_gf() const int nny = nyhi_fft-nylo_fft+1; loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (m = nzlo_fft; m <= nzhi_fft; m++) { mper = m - nz_pppm*(2*m/nz_pppm); @@ -185,13 +187,14 @@ void PPPMDispOMP::compute_gf() if (sqk != 0.0) { numerator = 4.0*MY_PI/sqk; - denominator = gf_denom(snx2,sny2,snz2, gf_b, order); + denominator = gf_denom(snx2,sny2,snz2, gf_b, order); greensfn[nn] = numerator*sx*sy*sz*wx*wy*wz/denominator; } else greensfn[nn] = 0.0; } } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -241,6 +244,8 @@ void PPPMDispOMP::compute_gf_6() const int nny = nyhi_fft_6-nylo_fft_6+1; loop_setup_thr(nnfrom, nnto, tid, nfft_6, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { mper = m - nz_pppm_6*(2*m/nz_pppm_6); @@ -252,7 +257,7 @@ void PPPMDispOMP::compute_gf_6() argz = 0.5*qz*zprd_slab/nz_pppm_6; if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); wz *= wz; - + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { lper = l - ny_pppm_6*(2*l/ny_pppm_6); qy = unitky*lper; @@ -279,11 +284,11 @@ void PPPMDispOMP::compute_gf_6() argx = 0.5*qx*xprd/nx_pppm_6; if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); wx *= wx; - + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); if (sqk != 0.0) { - denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); + denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); rtsqk = sqrt(sqk); term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); @@ -292,7 +297,8 @@ void PPPMDispOMP::compute_gf_6() } } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- run the regular toplevel compute method from plain PPPPM @@ -315,6 +321,7 @@ void PPPMDispOMP::compute(int eflag, int vflag) #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -429,6 +436,7 @@ void PPPMDispOMP::make_rho_c() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); // loop over my charges, add their contribution to nearby grid points @@ -442,7 +450,7 @@ void PPPMDispOMP::make_rho_c() const int ny = p2g[i].b; const int nz = p2g[i].t; - // pre-screen whether this atom will ever come within + // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; @@ -475,7 +483,8 @@ void PPPMDispOMP::make_rho_c() } } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } @@ -516,6 +525,7 @@ void PPPMDispOMP::make_rho_g() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6()); // loop over my charges, add their contribution to nearby grid points @@ -529,7 +539,7 @@ void PPPMDispOMP::make_rho_g() const int ny = p2g[i].b; const int nz = p2g[i].t; - // pre-screen whether this atom will ever come within + // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; @@ -564,7 +574,8 @@ void PPPMDispOMP::make_rho_g() } } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } @@ -618,6 +629,7 @@ void PPPMDispOMP::make_rho_a() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6()); // loop over my charges, add their contribution to nearby grid points @@ -631,7 +643,7 @@ void PPPMDispOMP::make_rho_a() const int ny = p2g[i].b; const int nz = p2g[i].t; - // pre-screen whether this atom will ever come within + // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; @@ -681,7 +693,8 @@ void PPPMDispOMP::make_rho_a() } } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } @@ -726,6 +739,7 @@ void PPPMDispOMP::fieldforce_c_ik() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); @@ -770,7 +784,8 @@ void PPPMDispOMP::fieldforce_c_ik() f[i][2] += qfactor*ekz; } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -829,6 +844,7 @@ void PPPMDispOMP::fieldforce_c_ad() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d()); @@ -893,7 +909,8 @@ void PPPMDispOMP::fieldforce_c_ad() if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -934,6 +951,7 @@ void PPPMDispOMP::fieldforce_c_peratom() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); int l,m,n,nx,ny,nz,mx,my,mz; @@ -989,7 +1007,8 @@ void PPPMDispOMP::fieldforce_c_peratom() } } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -1031,7 +1050,8 @@ void PPPMDispOMP::fieldforce_g_ik() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); - double * const * const f = thr->get_f(); + thr->timer(Timer::START); + double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; @@ -1078,7 +1098,8 @@ void PPPMDispOMP::fieldforce_g_ik() f[i][2] += lj*ekz; } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -1133,6 +1154,7 @@ void PPPMDispOMP::fieldforce_g_ad() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6()); FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6()); @@ -1200,7 +1222,8 @@ void PPPMDispOMP::fieldforce_g_ad() if (slabflag != 2) f[i][2] += ekz*lj - sf; } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -1241,6 +1264,7 @@ void PPPMDispOMP::fieldforce_g_peratom() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; @@ -1299,7 +1323,8 @@ void PPPMDispOMP::fieldforce_g_peratom() } } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -1341,6 +1366,7 @@ void PPPMDispOMP::fieldforce_a_ik() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6()); @@ -1420,7 +1446,8 @@ void PPPMDispOMP::fieldforce_a_ik() f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -1475,6 +1502,7 @@ void PPPMDispOMP::fieldforce_a_ad() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6()); FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6()); @@ -1609,7 +1637,8 @@ void PPPMDispOMP::fieldforce_a_ad() if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- @@ -1650,6 +1679,7 @@ void PPPMDispOMP::fieldforce_a_peratom() const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; @@ -1761,25 +1791,26 @@ void PPPMDispOMP::fieldforce_a_peratom() lj6 = B[7*type]*0.5; if (eflag_atom) - eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 + + eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 + u3*lj3 + u4*lj4 + u5*lj5 + u6*lj6; if (vflag_atom) { - vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + + vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + v04*lj4 + v05*lj5 + v06*lj6; - vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + + vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + v14*lj4 + v15*lj5 + v16*lj6; - vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + + vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + v24*lj4 + v25*lj5 + v26*lj6; - vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + + vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + v34*lj4 + v35*lj5 + v36*lj6; - vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + + vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + v44*lj4 + v45*lj5 + v46*lj6; - vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + + vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + v54*lj4 + v55*lj5 + v56*lj6; } } } - } + thr->timer(Timer::KSPACE); + } // end of parallel region } /* ---------------------------------------------------------------------- diff --git a/src/USER-OMP/pppm_disp_omp.h b/src/USER-OMP/pppm_disp_omp.h index e2f588d169..060f269471 100644 --- a/src/USER-OMP/pppm_disp_omp.h +++ b/src/USER-OMP/pppm_disp_omp.h @@ -41,7 +41,7 @@ namespace LAMMPS_NS { virtual void particle_map(double,double,double, double,int**,int,int, int,int,int,int,int,int); - + virtual void fieldforce_c_ik(); virtual void fieldforce_c_ad(); diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp index bcda74e92a..a62199be56 100644 --- a/src/USER-OMP/pppm_omp.cpp +++ b/src/USER-OMP/pppm_omp.cpp @@ -135,6 +135,8 @@ void PPPMOMP::compute_gf_ik() int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { m = n / (numl*numk); @@ -187,6 +189,7 @@ void PPPMOMP::compute_gf_ik() greensfn[n] = numerator*sum1/denominator; } else greensfn[n] = 0.0; } + thr->timer(Timer::KSPACE); } // end of parallel region } @@ -223,6 +226,8 @@ void PPPMOMP::compute_gf_ad() int k,l,m,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { @@ -276,8 +281,9 @@ void PPPMOMP::compute_gf_ad() sf5 += sf_precoeff6[n]*greensfn[n]; } } + thr->timer(Timer::KSPACE); } // end of paralle region - + // compute the coefficients for the self-force correction double prex, prey, prez, tmp[6]; @@ -318,6 +324,7 @@ void PPPMOMP::compute(int eflag, int vflag) const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -363,6 +370,7 @@ void PPPMOMP::make_rho() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); // loop over my charges, add their contribution to nearby grid points @@ -376,7 +384,7 @@ void PPPMOMP::make_rho() const int ny = p2g[i].b; const int nz = p2g[i].t; - // pre-screen whether this atom will ever come within + // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; @@ -409,6 +417,7 @@ void PPPMOMP::make_rho() } } } + thr->timer(Timer::KSPACE); } } @@ -451,6 +460,7 @@ void PPPMOMP::fieldforce_ik() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); @@ -488,6 +498,7 @@ void PPPMOMP::fieldforce_ik() f[i].y += qfactor*eky; if (slabflag != 2) f[i].z += qfactor*ekz; } + thr->timer(Timer::KSPACE); } // end of parallel region } @@ -535,6 +546,7 @@ void PPPMOMP::fieldforce_ad() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d()); @@ -590,6 +602,7 @@ void PPPMOMP::fieldforce_ad() sf *= 2.0*qi; if (slabflag != 2) f[i].z += qfactor*(ekz - sf); } + thr->timer(Timer::KSPACE); } // end of parallel region } @@ -626,6 +639,7 @@ void PPPMOMP::fieldforce_peratom() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); for (i = ifrom; i < ito; ++i) { @@ -672,6 +686,7 @@ void PPPMOMP::fieldforce_peratom() vatom[i][5] += qi*v5; } } + thr->timer(Timer::KSPACE); } // end of parallel region } diff --git a/src/USER-OMP/pppm_tip4p_omp.cpp b/src/USER-OMP/pppm_tip4p_omp.cpp index 9e6efb3512..b2e344036e 100644 --- a/src/USER-OMP/pppm_tip4p_omp.cpp +++ b/src/USER-OMP/pppm_tip4p_omp.cpp @@ -136,6 +136,8 @@ void PPPMTIP4POMP::compute_gf_ik() int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { m = n / (numl*numk); @@ -188,6 +190,7 @@ void PPPMTIP4POMP::compute_gf_ik() greensfn[n] = numerator*sum1/denominator; } else greensfn[n] = 0.0; } + thr->timer(Timer::KSPACE); } // end of parallel region } @@ -224,6 +227,8 @@ void PPPMTIP4POMP::compute_gf_ad() int k,l,m,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); + ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { @@ -277,8 +282,9 @@ void PPPMTIP4POMP::compute_gf_ad() sf5 += sf_precoeff6[n]*greensfn[n]; } } + thr->timer(Timer::KSPACE); } // end of paralle region - + // compute the coefficients for the self-force correction double prex, prey, prez, tmp[6]; @@ -319,6 +325,7 @@ void PPPMTIP4POMP::compute(int eflag, int vflag) const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } @@ -428,6 +435,7 @@ void PPPMTIP4POMP::make_rho() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); // loop over my charges, add their contribution to nearby grid points @@ -441,7 +449,7 @@ void PPPMTIP4POMP::make_rho() const int ny = p2g[i].b; const int nz = p2g[i].t; - // pre-screen whether this atom will ever come within + // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; @@ -479,6 +487,7 @@ void PPPMTIP4POMP::make_rho() } } } + thr->timer(Timer::KSPACE); } } @@ -523,6 +532,7 @@ void PPPMTIP4POMP::fieldforce_ik() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); @@ -583,6 +593,7 @@ void PPPMTIP4POMP::fieldforce_ik() if (slabflag != 2) f[iH2].z += 0.5*alpha*fz; } } + thr->timer(Timer::KSPACE); } // end of parallel region } @@ -633,6 +644,7 @@ void PPPMTIP4POMP::fieldforce_ad() // get per thread data ThrData *thr = fix->get_thr(tid); + thr->timer(Timer::START); dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d()); @@ -711,6 +723,7 @@ void PPPMTIP4POMP::fieldforce_ad() if (slabflag != 2) f[iH2].z += 0.5*alpha*fz; } } + thr->timer(Timer::KSPACE); } // end of parallel region } diff --git a/src/USER-OMP/respa_omp.cpp b/src/USER-OMP/respa_omp.cpp index b044e94ace..ed08f019fb 100644 --- a/src/USER-OMP/respa_omp.cpp +++ b/src/USER-OMP/respa_omp.cpp @@ -46,7 +46,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -RespaOMP::RespaOMP(LAMMPS *lmp, int narg, char **arg) +RespaOMP::RespaOMP(LAMMPS *lmp, int narg, char **arg) : Respa(lmp, narg, arg),ThrOMP(lmp, THR_INTGR) { } @@ -69,7 +69,12 @@ void RespaOMP::init() void RespaOMP::setup() { - if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n"); + if (comm->me == 0 && screen) { + fprintf(screen,"Setting up r-RESPA/omp run ...\n"); + fprintf(screen," Unit style : %s\n", update->unit_style); + fprintf(screen," Current step : " BIGINT_FORMAT "\n", update->ntimestep); + fprintf(screen," OuterTime step: %g\n", update->dt); + } update->setupflag = 1; @@ -101,14 +106,11 @@ void RespaOMP::setup() for (int ilevel = 0; ilevel < nlevels; ilevel++) { force_clear(newton[ilevel]); modify->setup_pre_force_respa(vflag,ilevel); - if (level_bond == ilevel && force->bond) - force->bond->compute(eflag,vflag); - if (level_angle == ilevel && force->angle) - force->angle->compute(eflag,vflag); - if (level_dihedral == ilevel && force->dihedral) - force->dihedral->compute(eflag,vflag); - if (level_improper == ilevel && force->improper) - force->improper->compute(eflag,vflag); + + if (nhybrid_styles > 0) { + set_compute_flags(ilevel); + force->pair->compute(eflag,vflag); + } if (level_pair == ilevel && pair_compute_flag) force->pair->compute(eflag,vflag); if (level_inner == ilevel && pair_compute_flag) @@ -117,6 +119,14 @@ void RespaOMP::setup() force->pair->compute_middle(); if (level_outer == ilevel && pair_compute_flag) force->pair->compute_outer(eflag,vflag); + if (level_bond == ilevel && force->bond) + force->bond->compute(eflag,vflag); + if (level_angle == ilevel && force->angle) + force->angle->compute(eflag,vflag); + if (level_dihedral == ilevel && force->dihedral) + force->dihedral->compute(eflag,vflag); + if (level_improper == ilevel && force->improper) + force->improper->compute(eflag,vflag); if (level_kspace == ilevel && force->kspace) { force->kspace->setup(); if (kspace_compute_flag) force->kspace->compute(eflag,vflag); @@ -139,7 +149,7 @@ void RespaOMP::setup() } fix->did_reduce(); } - + if (newton[ilevel]) comm->reverse_comm(); copy_f_flevel(ilevel); } @@ -188,14 +198,12 @@ void RespaOMP::setup_minimal(int flag) for (int ilevel = 0; ilevel < nlevels; ilevel++) { force_clear(newton[ilevel]); modify->setup_pre_force_respa(vflag,ilevel); - if (level_bond == ilevel && force->bond) - force->bond->compute(eflag,vflag); - if (level_angle == ilevel && force->angle) - force->angle->compute(eflag,vflag); - if (level_dihedral == ilevel && force->dihedral) - force->dihedral->compute(eflag,vflag); - if (level_improper == ilevel && force->improper) - force->improper->compute(eflag,vflag); + + if (nhybrid_styles > 0) { + set_compute_flags(ilevel); + force->pair->compute(eflag,vflag); + } + if (level_pair == ilevel && pair_compute_flag) force->pair->compute(eflag,vflag); if (level_inner == ilevel && pair_compute_flag) @@ -204,6 +212,14 @@ void RespaOMP::setup_minimal(int flag) force->pair->compute_middle(); if (level_outer == ilevel && pair_compute_flag) force->pair->compute_outer(eflag,vflag); + if (level_bond == ilevel && force->bond) + force->bond->compute(eflag,vflag); + if (level_angle == ilevel && force->angle) + force->angle->compute(eflag,vflag); + if (level_dihedral == ilevel && force->dihedral) + force->dihedral->compute(eflag,vflag); + if (level_improper == ilevel && force->improper) + force->improper->compute(eflag,vflag); if (level_kspace == ilevel && force->kspace) { force->kspace->setup(); if (kspace_compute_flag) force->kspace->compute(eflag,vflag); @@ -244,9 +260,11 @@ void RespaOMP::recurse(int ilevel) for (int iloop = 0; iloop < loop[ilevel]; iloop++) { + timer->stamp(); modify->initial_integrate_respa(vflag,ilevel,iloop); if (modify->n_post_integrate_respa) modify->post_integrate_respa(ilevel,iloop); + timer->stamp(Timer::MODIFY); // at outermost level, check on rebuilding neighbor list // at innermost level, communicate @@ -255,7 +273,11 @@ void RespaOMP::recurse(int ilevel) if (ilevel == nlevels-1) { int nflag = neighbor->decide(); if (nflag) { - if (modify->n_pre_exchange) modify->pre_exchange(); + if (modify->n_pre_exchange) { + timer->stamp(); + modify->pre_exchange(); + timer->stamp(Timer::MODIFY); + } if (triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); if (domain->box_change) { @@ -265,20 +287,27 @@ void RespaOMP::recurse(int ilevel) } timer->stamp(); comm->exchange(); - if (atom->sortfreq > 0 && + if (atom->sortfreq > 0 && update->ntimestep >= atom->nextsort) atom->sort(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); - if (modify->n_pre_neighbor) modify->pre_neighbor(); + timer->stamp(Timer::COMM); + if (modify->n_pre_neighbor) { + modify->pre_neighbor(); + timer->stamp(Timer::MODIFY); + } neighbor->build(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); + } else if (ilevel == 0) { + timer->stamp(); + comm->forward_comm(); + timer->stamp(Timer::COMM); } } else if (ilevel == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } // rRESPA recursion thru all levels @@ -295,45 +324,53 @@ void RespaOMP::recurse(int ilevel) // when potentials are invoked at same level force_clear(newton[ilevel]); - if (modify->n_pre_force_respa) + if (modify->n_pre_force_respa) { + timer->stamp(); modify->pre_force_respa(vflag,ilevel,iloop); + timer->stamp(Timer::MODIFY); + } timer->stamp(); - if (level_bond == ilevel && force->bond) { - force->bond->compute(eflag,vflag); - timer->stamp(TIME_BOND); - } - if (level_angle == ilevel && force->angle) { - force->angle->compute(eflag,vflag); - timer->stamp(TIME_BOND); - } - if (level_dihedral == ilevel && force->dihedral) { - force->dihedral->compute(eflag,vflag); - timer->stamp(TIME_BOND); - } - if (level_improper == ilevel && force->improper) { - force->improper->compute(eflag,vflag); - timer->stamp(TIME_BOND); + if (nhybrid_styles > 0) { + set_compute_flags(ilevel); + force->pair->compute(eflag,vflag); + timer->stamp(Timer::PAIR); } if (level_pair == ilevel && pair_compute_flag) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (level_inner == ilevel && pair_compute_flag) { force->pair->compute_inner(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (level_middle == ilevel && pair_compute_flag) { force->pair->compute_middle(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (level_outer == ilevel && pair_compute_flag) { force->pair->compute_outer(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); + } + if (level_bond == ilevel && force->bond) { + force->bond->compute(eflag,vflag); + timer->stamp(Timer::BOND); + } + if (level_angle == ilevel && force->angle) { + force->angle->compute(eflag,vflag); + timer->stamp(Timer::BOND); + } + if (level_dihedral == ilevel && force->dihedral) { + force->dihedral->compute(eflag,vflag); + timer->stamp(Timer::BOND); + } + if (level_improper == ilevel && force->improper) { + force->improper->compute(eflag,vflag); + timer->stamp(Timer::BOND); } if (level_kspace == ilevel && kspace_compute_flag) { force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } // reduce forces from per-thread arrays, if needed @@ -356,14 +393,14 @@ void RespaOMP::recurse(int ilevel) if (newton[ilevel]) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } - + timer->stamp(); if (modify->n_post_force_respa) modify->post_force_respa(vflag,ilevel,iloop); modify->final_integrate_respa(ilevel,iloop); + timer->stamp(Timer::MODIFY); } copy_f_flevel(ilevel); } - diff --git a/src/USER-OMP/thr_data.cpp b/src/USER-OMP/thr_data.cpp index 598fb85289..de09dadc58 100644 --- a/src/USER-OMP/thr_data.cpp +++ b/src/USER-OMP/thr_data.cpp @@ -22,16 +22,17 @@ #include <stdio.h> #include "memory.h" +#include "timer.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -ThrData::ThrData(int tid) +ThrData::ThrData(int tid, Timer *t) : _f(0),_torque(0),_erforce(0),_de(0),_drho(0),_mu(0),_lambda(0),_rhoB(0), - _D_values(0),_rho(0),_fp(0),_rho1d(0),_drho1d(0),_tid(tid) + _D_values(0),_rho(0),_fp(0),_rho1d(0),_drho1d(0),_tid(tid), _timer(t) { - // nothing else to do here. + _timer_active = 0; } @@ -45,6 +46,30 @@ void ThrData::check_tid(int tid) /* ---------------------------------------------------------------------- */ +void ThrData::_stamp(enum Timer::ttype flag) +{ + // do nothing until it gets set to 0 in ::setup() + if (_timer_active < 0) return; + + if (flag == Timer::START) { + _timer_active = 1; + } + + if (_timer_active) _timer->stamp(flag); +} + +/* ---------------------------------------------------------------------- */ + +double ThrData::get_time(enum Timer::ttype flag) +{ + if (_timer) + return _timer->get_wall(flag); + else + return 0.0; +} + +/* ---------------------------------------------------------------------- */ + void ThrData::init_force(int nall, double **f, double **torque, double *erforce, double *de, double *drho) { @@ -59,32 +84,29 @@ void ThrData::init_force(int nall, double **f, double **torque, eatom_pair=eatom_bond=eatom_angle=eatom_dihed=eatom_imprp=eatom_kspce=NULL; vatom_pair=vatom_bond=vatom_angle=vatom_dihed=vatom_imprp=vatom_kspce=NULL; - _f = f + _tid*nall; - if (nall > 0) + if (nall > 0 && f) { + _f = f + _tid*nall; memset(&(_f[0][0]),0,nall*3*sizeof(double)); + } else _f = NULL; - if (torque) { + if (nall > 0 && torque) { _torque = torque + _tid*nall; - if (nall > 0) - memset(&(_torque[0][0]),0,nall*3*sizeof(double)); + memset(&(_torque[0][0]),0,nall*3*sizeof(double)); } else _torque = NULL; - if (erforce) { + if (nall > 0 && erforce) { _erforce = erforce + _tid*nall; - if (nall > 0) - memset(&(_erforce[0]),0,nall*sizeof(double)); + memset(&(_erforce[0]),0,nall*sizeof(double)); } else _erforce = NULL; - if (de) { + if (nall > 0 && de) { _de = de + _tid*nall; - if (nall > 0) - memset(&(_de[0]),0,nall*sizeof(double)); + memset(&(_de[0]),0,nall*sizeof(double)); } else _de = NULL; - if (drho) { + if (nall > 0 && drho) { _drho = drho + _tid*nall; - if (nall > 0) - memset(&(_drho[0]),0,nall*sizeof(double)); + memset(&(_drho[0]),0,nall*sizeof(double)); } else _drho = NULL; } diff --git a/src/USER-OMP/thr_data.h b/src/USER-OMP/thr_data.h index bfcb110794..3f1d866a80 100644 --- a/src/USER-OMP/thr_data.h +++ b/src/USER-OMP/thr_data.h @@ -22,6 +22,8 @@ #include <omp.h> #endif +#include "timer.h" + namespace LAMMPS_NS { // per thread data accumulators @@ -32,12 +34,17 @@ class ThrData { friend class ThrOMP; public: - ThrData(int tid); + ThrData(int tid, class Timer *t); ~ThrData() {}; void check_tid(int); // thread id consistency check int get_tid() const { return _tid; }; // our thread id. + // inline wrapper, to make this more efficient + // when per-thread timers are off + void timer(enum Timer::ttype flag) { if (_timer) _stamp(flag); }; + double get_time(enum Timer::ttype flag); + // erase accumulator contents and hook up force arrays void init_force(int, double **, double **, double *, double *, double *); @@ -118,6 +125,12 @@ class ThrData { void *_drho1d_6; // my thread id const int _tid; + // timer info + int _timer_active; + class Timer *_timer; + + private: + void _stamp(enum Timer::ttype flag); public: // compute global per thread virial contribution from global forces and positions diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp index 4aea630554..4462f70bf1 100644 --- a/src/USER-OMP/thr_omp.cpp +++ b/src/USER-OMP/thr_omp.cpp @@ -23,6 +23,7 @@ #include "memory.h" #include "modify.h" #include "neighbor.h" +#include "timer.h" #include "thr_omp.h" @@ -183,7 +184,7 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, // pair_style hybrid will compute fdotr for us // but we first need to reduce the forces data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); - fix->did_reduce(); + fix->did_reduce(); need_force_reduce = 0; } } @@ -402,6 +403,7 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, if (lmp->atom->torque) data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid); } + thr->timer(Timer::COMM); } /* ---------------------------------------------------------------------- diff --git a/src/finish.cpp b/src/finish.cpp index ca0a883993..b145809282 100644 --- a/src/finish.cpp +++ b/src/finish.cpp @@ -16,9 +16,10 @@ #include "string.h" #include "stdio.h" #include "finish.h" -#include "lammps.h" +#include "timer.h" #include "universe.h" #include "accelerator_kokkos.h" +#include "accelerator_omp.h" #include "atom.h" #include "atom_vec.h" #include "molecule.h" @@ -30,12 +31,28 @@ #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" -#include "timer.h" #include "output.h" #include "memory.h" +#ifdef LMP_USER_OMP +#include "modify.h" +#include "fix_omp.h" +#include "thr_data.h" +#endif + using namespace LAMMPS_NS; +// local function prototypes, code at end of file + +static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt, + MPI_Comm world, const int nprocs, const int nthreads, + const int me, double time_loop, FILE *scr, FILE *log); + +#ifdef LMP_USER_OMP +static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which, + const int nthreads,FILE *scr, FILE *log); +#endif + /* ---------------------------------------------------------------------- */ Finish::Finish(LAMMPS *lmp) : Pointers(lmp) {} @@ -46,14 +63,16 @@ void Finish::end(int flag) { int i,m,nneigh,nneighfull; int histo[10]; - int loopflag,minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag; + int minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag; double time,tmp,ave,max,min; - double time_loop,time_other; + double time_loop,time_other,cpu_loop; int me,nprocs; MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); + const int nthreads = comm->nthreads; + // recompute natoms in case atoms have been lost bigint nblocal = atom->nlocal; @@ -67,8 +86,8 @@ void Finish::end(int flag) // flag = 3 = TAD // turn off neighflag for Kspace partition of verlet/split integrator - loopflag = 1; minflag = prdflag = tadflag = timeflag = fftflag = histoflag = neighflag = 0; + time_loop = cpu_loop = time_other = 0.0; if (flag == 1) { if (update->whichflag == 2) minflag = 1; @@ -80,53 +99,88 @@ void Finish::end(int flag) if (force->kspace && force->kspace_match("pppm",0) && force->kspace->fftbench) fftflag = 1; } - if (flag == 2) prdflag = histoflag = neighflag = 1; + if (flag == 2) prdflag = timeflag = histoflag = neighflag = 1; if (flag == 3) tadflag = histoflag = neighflag = 1; // loop stats - if (loopflag) { - time_other = timer->array[TIME_LOOP] - - (timer->array[TIME_PAIR] + timer->array[TIME_BOND] + - timer->array[TIME_KSPACE] + timer->array[TIME_NEIGHBOR] + - timer->array[TIME_COMM] + timer->array[TIME_OUTPUT]); + if (timer->has_loop()) { + + // overall loop time - time_loop = timer->array[TIME_LOOP]; + time_loop = timer->get_wall(Timer::TOTAL); + cpu_loop = timer->get_cpu(Timer::TOTAL); MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time_loop = tmp/nprocs; + MPI_Allreduce(&cpu_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + cpu_loop = tmp/nprocs; + if (time_loop > 0.0) cpu_loop = cpu_loop/time_loop*100.0; + + if (me == 0) { + int ntasks = nprocs * nthreads; + const char fmt1[] = "Loop time of %g on %d procs " + "for %d steps with " BIGINT_FORMAT " atoms\n\n"; + if (screen) fprintf(screen,fmt1,time_loop,ntasks,update->nsteps, + atom->natoms,cpu_loop); + if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps, + atom->natoms,cpu_loop); + + // Gromacs/NAMD-style performance metric for suitable unit settings + + if ( timeflag && !minflag && !prdflag && !tadflag && + (update->nsteps > 0) && (update->dt != 0.0) && + ((strcmp(update->unit_style,"lj") == 0) || + (strcmp(update->unit_style,"metal") == 0) || + (strcmp(update->unit_style,"micro") == 0) || + (strcmp(update->unit_style,"nano") == 0) || + (strcmp(update->unit_style,"electron") == 0) || + (strcmp(update->unit_style,"real") == 0)) ) { + double one_fs = force->femtosecond; + double t_step = ((double) time_loop) / ((double) update->nsteps); + double step_t = 1.0/t_step; + + if (strcmp(update->unit_style,"lj") == 0) { + double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs; + const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n"; + if (screen) fprintf(screen,perf,tau_day,step_t); + if (logfile) fprintf(logfile,perf,tau_day,step_t); + } else { + double hrs_ns = t_step / update->dt * 1000000.0 * one_fs / 3600.0; + double ns_day = 24.0*3600.0 / t_step * update->dt / one_fs/1000000.0; + const char perf[] = + "Performance: %.3f ns/day, %.3f hours/ns, %.3f timesteps/s\n"; + if (screen) fprintf(screen,perf,ns_day,hrs_ns,step_t); + if (logfile) fprintf(logfile,perf,ns_day,hrs_ns,step_t); + } + } - // overall loop time + // CPU use on MPI tasks and OpenMP threads -#if defined(_OPENMP) - if (me == 0) { - int ntasks = nprocs * comm->nthreads; - if (screen) fprintf(screen, - "Loop time of %g on %d procs (%d MPI x %d OpenMP) " - "for %d steps with " BIGINT_FORMAT " atoms\n", - time_loop,ntasks,nprocs,comm->nthreads, - update->nsteps,atom->natoms); - if (logfile) fprintf(logfile, - "Loop time of %g on %d procs (%d MPI x %d OpenMP) " - "for %d steps with " BIGINT_FORMAT " atoms\n", - time_loop,ntasks,nprocs,comm->nthreads, - update->nsteps,atom->natoms); - } +#ifdef LMP_USER_OMP + const char fmt2[] = + "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; + if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads); + if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads); #else - if (me == 0) { - if (screen) fprintf(screen, - "Loop time of %g on %d procs for %d steps with " - BIGINT_FORMAT " atoms\n", - time_loop,nprocs,update->nsteps,atom->natoms); - if (logfile) fprintf(logfile, - "Loop time of %g on %d procs for %d steps with " - BIGINT_FORMAT " atoms\n", - time_loop,nprocs,update->nsteps,atom->natoms); - } + const char fmt2[] = + "%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n"; + if (screen) fprintf(screen,fmt2,cpu_loop,nprocs); + if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs); #endif - if (time_loop == 0.0) time_loop = 1.0; + } } + // avoid division by zero for very short runs + + if (time_loop == 0.0) time_loop = 1.0; + if (cpu_loop == 0.0) cpu_loop = 100.0; + + // get "Other" wall time for later use + + if (timer->has_normal()) + time_other = timer->get_wall(Timer::TOTAL) - timer->get_wall(Timer::ALL); + // minimization stats if (minflag) { @@ -190,7 +244,7 @@ void Finish::end(int flag) if (screen) fprintf(screen,"PRD stats:\n"); if (logfile) fprintf(logfile,"PRD stats:\n"); - time = timer->array[TIME_PAIR]; + time = timer->get_wall(Timer::DEPHASE); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { @@ -202,7 +256,7 @@ void Finish::end(int flag) time,time/time_loop*100.0); } - time = timer->array[TIME_BOND]; + time = timer->get_wall(Timer::DYNAMICS); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { @@ -214,7 +268,7 @@ void Finish::end(int flag) time,time/time_loop*100.0); } - time = timer->array[TIME_KSPACE]; + time = timer->get_wall(Timer::QUENCH); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { @@ -226,10 +280,35 @@ void Finish::end(int flag) time,time/time_loop*100.0); } - time = time_other; + time = timer->get_wall(Timer::REPCOMM); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { + if (screen) + fprintf(screen," Comm time (%%) = %g (%g)\n", + time,time/time_loop*100.0); + if (logfile) + fprintf(logfile," Comm time (%%) = %g (%g)\n", + time,time/time_loop*100.0); + } + + + time = timer->get_wall(Timer::REPOUT); + MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + time = tmp/nprocs; + if (me == 0) { + if (screen) + fprintf(screen," Output time (%%) = %g (%g)\n", + time,time/time_loop*100.0); + if (logfile) + fprintf(logfile," Output time (%%) = %g (%g)\n", + time,time/time_loop*100.0); + } + + time = time_other; + MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + time = tmp/nprocs; + if (me == 0) { // XXXX: replica comm, replica output if (screen) fprintf(screen," Other time (%%) = %g (%g)\n", time,time/time_loop*100.0); @@ -250,7 +329,7 @@ void Finish::end(int flag) if (screen) fprintf(screen,"TAD stats:\n"); if (logfile) fprintf(logfile,"TAD stats:\n"); - time = timer->array[TIME_PAIR]; + time = timer->get_wall(Timer::NEB); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { @@ -262,7 +341,7 @@ void Finish::end(int flag) time,time/time_loop*100.0); } - time = timer->array[TIME_BOND]; + time = timer->get_wall(Timer::DYNAMICS); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { @@ -274,7 +353,7 @@ void Finish::end(int flag) time,time/time_loop*100.0); } - time = timer->array[TIME_KSPACE]; + time = timer->get_wall(Timer::QUENCH); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { @@ -287,7 +366,7 @@ void Finish::end(int flag) } - time = timer->array[TIME_COMM]; + time = timer->get_wall(Timer::REPCOMM); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { @@ -300,7 +379,7 @@ void Finish::end(int flag) } - time = timer->array[TIME_OUTPUT]; + time = timer->get_wall(Timer::REPOUT); MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; if (me == 0) { @@ -325,102 +404,109 @@ void Finish::end(int flag) } } - // timing breakdowns - - if (timeflag) { - if (me == 0) { - if (screen) fprintf(screen,"\n"); - if (logfile) fprintf(logfile,"\n"); - } - - time = timer->array[TIME_PAIR]; - MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - time = tmp/nprocs; - if (me == 0) { - if (screen) - fprintf(screen,"Pair time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - if (logfile) - fprintf(logfile,"Pair time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - } + if (timeflag && timer->has_normal()) { - if (atom->molecular) { - time = timer->array[TIME_BOND]; - MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - time = tmp/nprocs; + if (timer->has_full()) { + const char hdr[] = "\nMPI task timing breakdown:\n" + "Section | min time | avg time | max time |%varavg| %CPU | %total\n" + "-----------------------------------------------------------------------\n"; if (me == 0) { - if (screen) - fprintf(screen,"Bond time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - if (logfile) - fprintf(logfile,"Bond time (%%) = %g (%g)\n", - time,time/time_loop*100.0); + if (screen) fputs(hdr,screen); + if (logfile) fputs(hdr,logfile); } - } - - if (force->kspace) { - time = timer->array[TIME_KSPACE]; - MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - time = tmp/nprocs; + } else { + const char hdr[] = "\nMPI task timing breakdown:\n" + "Section | min time | avg time | max time |%varavg| %total\n" + "---------------------------------------------------------------\n"; if (me == 0) { - if (screen) - fprintf(screen,"Kspce time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - if (logfile) - fprintf(logfile,"Kspce time (%%) = %g (%g)\n", - time,time/time_loop*100.0); + if (screen) fputs(hdr,screen); + if (logfile) fputs(hdr,logfile); } } - time = timer->array[TIME_NEIGHBOR]; - MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - time = tmp/nprocs; - if (me == 0) { - if (screen) - fprintf(screen,"Neigh time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - if (logfile) - fprintf(logfile,"Neigh time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - } + mpi_timings("Pair",timer,Timer::PAIR, world,nprocs, + nthreads,me,time_loop,screen,logfile); + + if (atom->molecular) + mpi_timings("Bond",timer,Timer::BOND,world,nprocs, + nthreads,me,time_loop,screen,logfile); + + if (force->kspace) + mpi_timings("Kspace",timer,Timer::KSPACE,world,nprocs, + nthreads,me,time_loop,screen,logfile); + + mpi_timings("Neigh",timer,Timer::NEIGH,world,nprocs, + nthreads,me,time_loop,screen,logfile); + mpi_timings("Comm",timer,Timer::COMM,world,nprocs, + nthreads,me,time_loop,screen,logfile); + mpi_timings("Output",timer,Timer::OUTPUT,world,nprocs, + nthreads,me,time_loop,screen,logfile); + mpi_timings("Modify",timer,Timer::MODIFY,world,nprocs, + nthreads,me,time_loop,screen,logfile); + if (timer->has_sync()) + mpi_timings("Sync",timer,Timer::SYNC,world,nprocs, + nthreads,me,time_loop,screen,logfile); - time = timer->array[TIME_COMM]; + time = time_other; MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time = tmp/nprocs; + + const char *fmt; + if (timer->has_full()) + fmt = "Other | |%- 12.4g| | | |%6.2f\n"; + else + fmt = "Other | |%- 12.4g| | |%6.2f\n"; + if (me == 0) { - if (screen) - fprintf(screen,"Comm time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - if (logfile) - fprintf(logfile,"Comm time (%%) = %g (%g)\n", - time,time/time_loop*100.0); + if (screen) fprintf(screen,fmt,time,time/time_loop*100.0); + if (logfile) fprintf(logfile,fmt,time,time/time_loop*100.0); } + } - time = timer->array[TIME_OUTPUT]; - MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - time = tmp/nprocs; - if (me == 0) { - if (screen) - fprintf(screen,"Outpt time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - if (logfile) - fprintf(logfile,"Outpt time (%%) = %g (%g)\n", - time,time/time_loop*100.0); +#ifdef LMP_USER_OMP + const char thr_hdr_fmt[] = + "\nThread timing breakdown (MPI rank %d):\nTotal threaded time %.4g / %.1f%%\n"; + const char thr_header[] = + "Section | min time | avg time | max time |%varavg| %total\n" + "---------------------------------------------------------------\n"; + + int ifix = modify->find_fix("package_omp"); + + // print thread breakdown only with full timer detail + + if ((ifix >= 0) && timer->has_full() && me == 0) { + double thr_total = 0.0; + ThrData *td; + FixOMP *fixomp = static_cast<FixOMP *>(lmp->modify->fix[ifix]); + for (i=0; i < nthreads; ++i) { + td = fixomp->get_thr(i); + thr_total += td->get_time(Timer::ALL); } + thr_total /= (double) nthreads; - time = time_other; - MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - time = tmp/nprocs; - if (me == 0) { - if (screen) - fprintf(screen,"Other time (%%) = %g (%g)\n", - time,time/time_loop*100.0); - if (logfile) - fprintf(logfile,"Other time (%%) = %g (%g)\n", - time,time/time_loop*100.0); + if (thr_total > 0.0) { + if (screen) { + fprintf(screen,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0); + fputs(thr_header,screen); + } + if (logfile) { + fprintf(logfile,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0); + fputs(thr_header,logfile); + } + + omp_times(fixomp,"Pair",Timer::PAIR,nthreads,screen,logfile); + + if (atom->molecular) + omp_times(fixomp,"Bond",Timer::BOND,nthreads,screen,logfile); + + if (force->kspace) + omp_times(fixomp,"Kspace",Timer::KSPACE,nthreads,screen,logfile); + + omp_times(fixomp,"Neigh",Timer::NEIGH,nthreads,screen,logfile); + omp_times(fixomp,"Reduce",Timer::COMM,nthreads,screen,logfile); } } +#endif // FFT timing statistics // time3d,time1d = total time during run for 3d and 1d FFTs @@ -459,7 +545,7 @@ void Finish::end(int flag) MPI_Allreduce(&time1d,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time1d = tmp/nprocs; - double time_kspace = timer->array[TIME_KSPACE]; + double time_kspace = timer->get_wall(Timer::KSPACE); MPI_Allreduce(&time_kspace,&tmp,1,MPI_DOUBLE,MPI_SUM,world); time_kspace = tmp/nprocs; @@ -536,8 +622,10 @@ void Finish::end(int flag) neighbor->old_requests[m]->gran || neighbor->old_requests[m]->respaouter || neighbor->old_requests[m]->half_from_full) && - neighbor->old_requests[m]->skip == 0) { - if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break; + neighbor->old_requests[m]->skip == 0 && + neighbor->lists[m] && neighbor->lists[m]->numneigh) { + if (!neighbor->lists[m] && lmp->kokkos && + lmp->kokkos->neigh_list_kokkos(m)) break; else break; } } @@ -584,13 +672,14 @@ void Finish::end(int flag) nneighfull = 0; if (m < neighbor->old_nrequest) { - if (neighbor->lists[m]) { + if (neighbor->lists[m] && neighbor->lists[m]->numneigh) { int inum = neighbor->lists[m]->inum; int *ilist = neighbor->lists[m]->ilist; int *numneigh = neighbor->lists[m]->numneigh; for (i = 0; i < inum; i++) nneighfull += numneigh[ilist[i]]; - } else if (lmp->kokkos) nneighfull = lmp->kokkos->neigh_count(m); + } else if (!neighbor->lists[m] && lmp->kokkos) + nneighfull = lmp->kokkos->neigh_count(m); tmp = nneighfull; stats(1,&tmp,&ave,&max,&min,10,histo); @@ -622,7 +711,7 @@ void Finish::end(int flag) MPI_Allreduce(&tmp,&nall,1,MPI_DOUBLE,MPI_SUM,world); int nspec; - double nspec_all; + double nspec_all = 0; if (atom->molecular == 1) { int **nspecial = atom->nspecial; int nlocal = atom->nlocal; @@ -731,3 +820,95 @@ void Finish::stats(int n, double *data, *pmax = max; *pmin = min; } + +/* ---------------------------------------------------------------------- */ + +static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt, + MPI_Comm world, const int nprocs, const int nthreads, + const int me, double time_loop, FILE *scr, FILE *log) +{ + double tmp, time_max, time_min, time_sq; + double time = t->get_wall(tt); + + double time_cpu = t->get_cpu(tt); + if (time/time_loop < 0.001) // insufficient timer resolution! + time_cpu = 1.0; + else + time_cpu = time_cpu / time; + if (time_cpu > nthreads) time_cpu = nthreads; + + MPI_Allreduce(&time,&time_min,1,MPI_DOUBLE,MPI_MIN,world); + MPI_Allreduce(&time,&time_max,1,MPI_DOUBLE,MPI_MAX,world); + time_sq = time*time; + MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + time = tmp/nprocs; + MPI_Allreduce(&time_sq,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + time_sq = tmp/nprocs; + MPI_Allreduce(&time_cpu,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + time_cpu = tmp/nprocs*100.0; + + // % variance from the average as measure of load imbalance + if (time > 1.0e-10) + time_sq = sqrt(time_sq/time - time)*100.0; + else + time_sq = 0.0; + + + if (me == 0) { + tmp = time/time_loop*100.0; + if (t->has_full()) { + const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.1f |%6.2f\n"; + if (scr) + fprintf(scr,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp); + if (log) + fprintf(log,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp); + time_loop = 100.0/time_loop; + } else { + const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n"; + if (scr) + fprintf(scr,fmt,label,time_min,time,time_max,time_sq,tmp); + if (log) + fprintf(log,fmt,label,time_min,time,time_max,time_sq,tmp); + } + } +} + +/* ---------------------------------------------------------------------- */ + +#ifdef LMP_USER_OMP +static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which, + const int nthreads,FILE *scr, FILE *log) +{ + const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n"; + double time_min, time_max, time_avg, time_total, time_std; + + time_min = 1.0e100; + time_max = -1.0e100; + time_total = time_avg = time_std = 0.0; + + for (int i=0; i < nthreads; ++i) { + ThrData *thr = fix->get_thr(i); + double tmp=thr->get_time(which); + time_min = MIN(time_min,tmp); + time_max = MAX(time_max,tmp); + time_avg += tmp; + time_std += tmp*tmp; + time_total += thr->get_time(Timer::ALL); + } + + time_avg /= nthreads; + time_std /= nthreads; + time_total /= nthreads; + + if (time_avg > 1.0e-10) + time_std = sqrt(time_std/time_avg - time_avg)*100.0; + else + time_std = 0.0; + + if (scr) fprintf(scr,fmt,label,time_min,time_avg,time_max,time_std, + time_avg/time_total*100.0); + if (log) fprintf(log,fmt,label,time_min,time_avg,time_max,time_std, + time_avg/time_total*100.0); +} +#endif + diff --git a/src/input.cpp b/src/input.cpp index 455b8dc869..da1b8a0bfc 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -43,6 +43,7 @@ #include "update.h" #include "neighbor.h" #include "special.h" +#include "timer.h" #include "variable.h" #include "accelerator_cuda.h" #include "accelerator_kokkos.h" @@ -685,6 +686,7 @@ int Input::execute_command() else if (!strcmp(command,"thermo_modify")) thermo_modify(); else if (!strcmp(command,"thermo_style")) thermo_style(); else if (!strcmp(command,"timestep")) timestep(); + else if (!strcmp(command,"timers")) timers(); else if (!strcmp(command,"uncompute")) uncompute(); else if (!strcmp(command,"undump")) undump(); else if (!strcmp(command,"unfix")) unfix(); @@ -1037,6 +1039,7 @@ void Input::print() if (strcmp(arg[iarg],"file") == 0 || strcmp(arg[iarg],"append") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal print command"); if (me == 0) { + if (fp != NULL) fclose(fp); if (strcmp(arg[iarg],"file") == 0) fp = fopen(arg[iarg+1],"w"); else fp = fopen(arg[iarg+1],"a"); if (fp == NULL) { @@ -1741,6 +1744,13 @@ void Input::thermo_style() /* ---------------------------------------------------------------------- */ +void Input::timers() +{ + timer->modify_params(narg,arg); +} + +/* ---------------------------------------------------------------------- */ + void Input::timestep() { if (narg != 1) error->all(FLERR,"Illegal timestep command"); diff --git a/src/input.h b/src/input.h index 5863e0cefe..6637d97373 100644 --- a/src/input.h +++ b/src/input.h @@ -62,7 +62,7 @@ class Input : protected Pointers { void reallocate(char *&, int &, int); // reallocate a char string int execute_command(); // execute a single command - void clear(); // input script commands + void clear(); // input script commands void echo(); void ifthenelse(); void include(); @@ -77,7 +77,7 @@ class Input : protected Pointers { void shell(); void variable_command(); - void angle_coeff(); // LAMMPS commands + void angle_coeff(); // LAMMPS commands void angle_style(); void atom_modify(); void atom_style(); @@ -126,6 +126,7 @@ class Input : protected Pointers { void thermo_modify(); void thermo_style(); void timestep(); + void timers(); void uncompute(); void undump(); void unfix(); diff --git a/src/min.cpp b/src/min.cpp index 12afac4037..b06ccfc638 100644 --- a/src/min.cpp +++ b/src/min.cpp @@ -459,9 +459,13 @@ double Min::energy_force(int resetflag) if (nflag == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } else { - if (modify->n_min_pre_exchange) modify->min_pre_exchange(); + if (modify->n_min_pre_exchange) { + timer->stamp(); + modify->min_pre_exchange(); + timer->stamp(Timer::MODIFY); + } if (triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); if (domain->box_change) { @@ -475,20 +479,24 @@ double Min::energy_force(int resetflag) update->ntimestep >= atom->nextsort) atom->sort(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); neighbor->build(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); } ev_set(update->ntimestep); force_clear(); - if (modify->n_min_pre_force) modify->min_pre_force(vflag); timer->stamp(); + if (modify->n_min_pre_force) { + modify->min_pre_force(vflag); + timer->stamp(Timer::MODIFY); + } + if (pair_compute_flag) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (atom->molecular) { @@ -496,17 +504,17 @@ double Min::energy_force(int resetflag) if (force->angle) force->angle->compute(eflag,vflag); if (force->dihedral) force->dihedral->compute(eflag,vflag); if (force->improper) force->improper->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (kspace_compute_flag) { force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } if (force->newton) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } // update per-atom minimization variables stored by pair styles @@ -517,7 +525,11 @@ double Min::energy_force(int resetflag) // fixes that affect minimization - if (modify->n_min_post_force) modify->min_post_force(vflag); + if (modify->n_min_post_force) { + timer->stamp(); + modify->min_post_force(vflag); + timer->stamp(Timer::MODIFY); + } // compute potential energy of system // normalize if thermo PE does diff --git a/src/min_cg.cpp b/src/min_cg.cpp index f88426f587..4953370562 100644 --- a/src/min_cg.cpp +++ b/src/min_cg.cpp @@ -175,7 +175,7 @@ int MinCG::iterate(int maxiter) if (output->next == ntimestep) { timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } diff --git a/src/min_fire.cpp b/src/min_fire.cpp index 6ef26e7eaa..8d0debf349 100644 --- a/src/min_fire.cpp +++ b/src/min_fire.cpp @@ -266,7 +266,7 @@ int MinFire::iterate(int maxiter) if (output->next == ntimestep) { timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } diff --git a/src/min_hftn.cpp b/src/min_hftn.cpp index cb47934380..cdd9bcda3b 100644 --- a/src/min_hftn.cpp +++ b/src/min_hftn.cpp @@ -535,7 +535,7 @@ int MinHFTN::execute_hftn_(const bool bPrintProgress, } timer->stamp(); output->write (update->ntimestep); - timer->stamp (TIME_OUTPUT); + timer->stamp (Timer::OUTPUT); } //---- RETURN IF NUMBER OF EVALUATIONS EXCEEDED. diff --git a/src/min_quickmin.cpp b/src/min_quickmin.cpp index 7de5dc6c99..124b5bf575 100644 --- a/src/min_quickmin.cpp +++ b/src/min_quickmin.cpp @@ -232,7 +232,7 @@ int MinQuickMin::iterate(int maxiter) if (output->next == ntimestep) { timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } diff --git a/src/min_sd.cpp b/src/min_sd.cpp index 80cad3e135..44936ce32a 100644 --- a/src/min_sd.cpp +++ b/src/min_sd.cpp @@ -100,7 +100,7 @@ int MinSD::iterate(int maxiter) if (output->next == ntimestep) { timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } diff --git a/src/minimize.cpp b/src/minimize.cpp index 44a037c49f..b927ee9270 100644 --- a/src/minimize.cpp +++ b/src/minimize.cpp @@ -54,9 +54,9 @@ void Minimize::command(int narg, char **arg) update->minimize->setup(); timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); update->minimize->run(update->nsteps); - timer->barrier_stop(TIME_LOOP); + timer->barrier_stop(); update->minimize->cleanup(); diff --git a/src/neighbor.cpp b/src/neighbor.cpp index 34d8d692eb..493defc7e1 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -814,7 +814,8 @@ void Neighbor::init() fprintf(logfile," ghost atom cutoff = %g\n",cutghost); if (style != NSQ) fprintf(logfile," binsize = %g -> bins = %g %g %g\n",binsize, - ceil(bbox[0]/binsize), ceil(bbox[1]/binsize), ceil(bbox[2]/binsize)); + ceil(bbox[0]/binsize), ceil(bbox[1]/binsize), + ceil(bbox[2]/binsize)); } if (screen) { fprintf(screen,"Neighbor list info ...\n"); @@ -825,7 +826,8 @@ void Neighbor::init() fprintf(screen," ghost atom cutoff = %g\n",cutghost); if (style != NSQ) fprintf(screen," binsize = %g, bins = %g %g %g\n",binsize, - ceil(bbox[0]/binsize), ceil(bbox[1]/binsize), ceil(bbox[2]/binsize)); + ceil(bbox[0]/binsize), ceil(bbox[1]/binsize), + ceil(bbox[2]/binsize)); } } } diff --git a/src/rerun.cpp b/src/rerun.cpp index 08e7ccf42a..bb98f23dfa 100644 --- a/src/rerun.cpp +++ b/src/rerun.cpp @@ -142,7 +142,7 @@ void Rerun::command(int narg, char **arg) lmp->init(); timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); bigint ntimestep = rd->seek(first,0); if (ntimestep < 0) @@ -172,7 +172,7 @@ void Rerun::command(int narg, char **arg) output->next_thermo = update->ntimestep; output->write(update->ntimestep); - timer->barrier_stop(TIME_LOOP); + timer->barrier_stop(); update->integrate->cleanup(); diff --git a/src/respa.cpp b/src/respa.cpp index 3af1dafcfb..632d2a109f 100644 --- a/src/respa.cpp +++ b/src/respa.cpp @@ -452,6 +452,7 @@ void Respa::setup() force->kspace->setup(); if (kspace_compute_flag) force->kspace->compute(eflag,vflag); } + if (newton[ilevel]) comm->reverse_comm(); copy_f_flevel(ilevel); } @@ -554,12 +555,16 @@ void Respa::run(int n) sum_flevel_f(); - if (modify->n_end_of_step) modify->end_of_step(); + if (modify->n_end_of_step) { + timer->stamp(); + modify->end_of_step(); + timer->stamp(Timer::MODIFY); + } if (ntimestep == output->next) { timer->stamp(); output->write(update->ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } } @@ -593,9 +598,11 @@ void Respa::recurse(int ilevel) for (int iloop = 0; iloop < loop[ilevel]; iloop++) { + timer->stamp(); modify->initial_integrate_respa(vflag,ilevel,iloop); if (modify->n_post_integrate_respa) modify->post_integrate_respa(ilevel,iloop); + timer->stamp(Timer::MODIFY); // at outermost level, check on rebuilding neighbor list // at innermost level, communicate @@ -604,7 +611,11 @@ void Respa::recurse(int ilevel) if (ilevel == nlevels-1) { int nflag = neighbor->decide(); if (nflag) { - if (modify->n_pre_exchange) modify->pre_exchange(); + if (modify->n_pre_exchange) { + timer->stamp(); + modify->pre_exchange(); + timer->stamp(Timer::MODIFY); + } if (triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); if (domain->box_change) { @@ -614,24 +625,27 @@ void Respa::recurse(int ilevel) } timer->stamp(); comm->exchange(); - if (atom->sortfreq > 0 && + if (atom->sortfreq > 0 && update->ntimestep >= atom->nextsort) atom->sort(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); - if (modify->n_pre_neighbor) modify->pre_neighbor(); + timer->stamp(Timer::COMM); + if (modify->n_pre_neighbor) { + modify->pre_neighbor(); + timer->stamp(Timer::MODIFY); + } neighbor->build(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); } else if (ilevel == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } } else if (ilevel == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } // rRESPA recursion thru all levels @@ -648,60 +662,64 @@ void Respa::recurse(int ilevel) // when potentials are invoked at same level force_clear(newton[ilevel]); - if (modify->n_pre_force_respa) + if (modify->n_pre_force_respa) { + timer->stamp(); modify->pre_force_respa(vflag,ilevel,iloop); + timer->stamp(Timer::MODIFY); + } timer->stamp(); if (nhybrid_styles > 0) { set_compute_flags(ilevel); force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (level_pair == ilevel && pair_compute_flag) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (level_inner == ilevel && pair_compute_flag) { force->pair->compute_inner(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (level_middle == ilevel && pair_compute_flag) { force->pair->compute_middle(); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (level_outer == ilevel && pair_compute_flag) { force->pair->compute_outer(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (level_bond == ilevel && force->bond) { force->bond->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (level_angle == ilevel && force->angle) { force->angle->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (level_dihedral == ilevel && force->dihedral) { force->dihedral->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (level_improper == ilevel && force->improper) { force->improper->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (level_kspace == ilevel && kspace_compute_flag) { force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } if (newton[ilevel]) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } - + timer->stamp(); if (modify->n_post_force_respa) modify->post_force_respa(vflag,ilevel,iloop); modify->final_integrate_respa(ilevel,iloop); + timer->stamp(Timer::MODIFY); } copy_f_flevel(ilevel); diff --git a/src/run.cpp b/src/run.cpp index 455f5b07a8..62b888114f 100644 --- a/src/run.cpp +++ b/src/run.cpp @@ -171,9 +171,9 @@ void Run::command(int narg, char **arg) } else output->setup(0); timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); update->integrate->run(nsteps); - timer->barrier_stop(TIME_LOOP); + timer->barrier_stop(); update->integrate->cleanup(); @@ -209,9 +209,9 @@ void Run::command(int narg, char **arg) } else output->setup(0); timer->init(); - timer->barrier_start(TIME_LOOP); + timer->barrier_start(); update->integrate->run(nsteps); - timer->barrier_stop(TIME_LOOP); + timer->barrier_stop(); update->integrate->cleanup(); diff --git a/src/thermo.cpp b/src/thermo.cpp index ee7ab0fa76..bef74d0358 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -46,6 +46,9 @@ #include "math_const.h" #include "memory.h" #include "error.h" +#include "universe.h" + +#include "math_const.h" using namespace LAMMPS_NS; using namespace MathConst; @@ -335,7 +338,7 @@ void Thermo::compute(int flag) int loc = 0; if (lineflag == MULTILINE) { double cpu; - if (flag) cpu = timer->elapsed(TIME_LOOP); + if (flag) cpu = timer->elapsed(Timer::TOTAL); else cpu = 0.0; loc = sprintf(&line[loc],format_multi,ntimestep,cpu); } @@ -1520,7 +1523,7 @@ void Thermo::compute_time() void Thermo::compute_cpu() { if (firststep == 0) dvalue = 0.0; - else dvalue = timer->elapsed(TIME_LOOP); + else dvalue = timer->elapsed(Timer::TOTAL); } /* ---------------------------------------------------------------------- */ @@ -1534,7 +1537,7 @@ void Thermo::compute_tpcpu() new_cpu = 0.0; dvalue = 0.0; } else { - new_cpu = timer->elapsed(TIME_LOOP); + new_cpu = timer->elapsed(Timer::TOTAL); double cpu_diff = new_cpu - last_tpcpu; double time_diff = new_time - last_time; if (time_diff > 0.0 && cpu_diff > 0.0) dvalue = time_diff/cpu_diff; @@ -1556,7 +1559,7 @@ void Thermo::compute_spcpu() new_cpu = 0.0; dvalue = 0.0; } else { - new_cpu = timer->elapsed(TIME_LOOP); + new_cpu = timer->elapsed(Timer::TOTAL); double cpu_diff = new_cpu - last_spcpu; int step_diff = new_step - last_step; if (cpu_diff > 0.0) dvalue = step_diff/cpu_diff; @@ -1572,7 +1575,7 @@ void Thermo::compute_spcpu() void Thermo::compute_cpuremain() { if (firststep == 0) dvalue = 0.0; - else dvalue = timer->elapsed(TIME_LOOP) * + else dvalue = timer->elapsed(Timer::TOTAL) * (update->laststep - update->ntimestep) / (update->ntimestep - update->firststep); } @@ -2093,3 +2096,4 @@ void Thermo::compute_cellgamma() dvalue = acos(cosgamma)*180.0/MY_PI; } } + diff --git a/src/timer.cpp b/src/timer.cpp index 329de00d60..2fb8453684 100644 --- a/src/timer.cpp +++ b/src/timer.cpp @@ -12,73 +12,201 @@ ------------------------------------------------------------------------- */ #include "mpi.h" +#include "string.h" #include "timer.h" +#include "comm.h" +#include "error.h" #include "memory.h" +#ifdef _WIN32 +#include <windows.h> +#include <stdint.h> +#else +#include <sys/time.h> +#include <sys/resource.h> +#endif + using namespace LAMMPS_NS; -/* ---------------------------------------------------------------------- */ +// Return the CPU time for the current process in seconds very +// much in the same way as MPI_Wtime() returns the wall time. -Timer::Timer(LAMMPS *lmp) : Pointers(lmp) +static double CPU_Time() { - memory->create(array,TIME_N,"array"); + double rv = 0.0; + +#ifdef _WIN32 + + // from MSD docs. + FILETIME ct,et,kt,ut; + union { FILETIME ft; uint64_t ui; } cpu; + if (GetProcessTimes(GetCurrentProcess(),&ct,&et,&kt,&ut)) { + cpu.ft = ut; + rv = cpu.ui * 0.0000001; + } + +#else /* ! _WIN32 */ + + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) == 0) { + rv = (double) ru.ru_utime.tv_sec; + rv += (double) ru.ru_utime.tv_usec * 0.000001; + } + +#endif /* ! _WIN32 */ + + return rv; } /* ---------------------------------------------------------------------- */ -Timer::~Timer() +Timer::Timer(LAMMPS *lmp) : Pointers(lmp) { - memory->destroy(array); + _level = NORMAL; + _sync = OFF; } /* ---------------------------------------------------------------------- */ void Timer::init() { - for (int i = 0; i < TIME_N; i++) array[i] = 0.0; + for (int i = 0; i < NUM_TIMER; i++) { + cpu_array[i] = 0.0; + wall_array[i] = 0.0; + } } /* ---------------------------------------------------------------------- */ -void Timer::stamp() +void Timer::_stamp(enum ttype which) { - // uncomment if want synchronized timing - // MPI_Barrier(world); - previous_time = MPI_Wtime(); + double current_cpu=0.0, current_wall=0.0; + + if (_level > NORMAL) current_cpu = CPU_Time(); + current_wall = MPI_Wtime(); + + if ((which > TOTAL) && (which < NUM_TIMER)) { + const double delta_cpu = current_cpu - previous_cpu; + const double delta_wall = current_wall - previous_wall; + + cpu_array[which] += delta_cpu; + wall_array[which] += delta_wall; + cpu_array[ALL] += delta_cpu; + wall_array[ALL] += delta_wall; + } + + previous_cpu = current_cpu; + previous_wall = current_wall; + + if (which == RESET) { + this->init(); + cpu_array[TOTAL] = current_cpu; + wall_array[TOTAL] = current_wall; + } + + if (_sync) { + MPI_Barrier(world); + if (_level > NORMAL) current_cpu = CPU_Time(); + current_wall = MPI_Wtime(); + + cpu_array[SYNC] += current_cpu - previous_cpu; + wall_array[SYNC] += current_wall - previous_wall; + previous_cpu = current_cpu; + previous_wall = current_wall; + } } /* ---------------------------------------------------------------------- */ -void Timer::stamp(int which) +void Timer::barrier_start() { - // uncomment if want synchronized timing - // MPI_Barrier(world); - double current_time = MPI_Wtime(); - array[which] += current_time - previous_time; - previous_time = current_time; + double current_cpu=0.0, current_wall=0.0; + + MPI_Barrier(world); + + if (_level < LOOP) return; + + current_cpu = CPU_Time(); + current_wall = MPI_Wtime(); + + cpu_array[TOTAL] = current_cpu; + wall_array[TOTAL] = current_wall; + previous_cpu = current_cpu; + previous_wall = current_wall; } /* ---------------------------------------------------------------------- */ -void Timer::barrier_start(int which) +void Timer::barrier_stop() { + double current_cpu=0.0, current_wall=0.0; + MPI_Barrier(world); - array[which] = MPI_Wtime(); + + if (_level < LOOP) return; + + current_cpu = CPU_Time(); + current_wall = MPI_Wtime(); + + cpu_array[TOTAL] = current_cpu - cpu_array[TOTAL]; + wall_array[TOTAL] = current_wall - wall_array[TOTAL]; } /* ---------------------------------------------------------------------- */ -void Timer::barrier_stop(int which) +double Timer::cpu(enum ttype which) { - MPI_Barrier(world); - double current_time = MPI_Wtime(); - array[which] = current_time - array[which]; + double current_cpu = CPU_Time(); + return (current_cpu - cpu_array[which]); } /* ---------------------------------------------------------------------- */ -double Timer::elapsed(int which) +double Timer::elapsed(enum ttype which) +{ + if (_level == OFF) return 0.0; + double current_wall = MPI_Wtime(); + return (current_wall - wall_array[which]); +} + +/* ---------------------------------------------------------------------- */ + +void Timer::set_wall(enum ttype which, double newtime) +{ + wall_array[which] = newtime; +} + +/* ---------------------------------------------------------------------- + modify parameters of the Timer class +------------------------------------------------------------------------- */ +static const char *timer_style[] = { "off", "loop", "normal", "full" }; +static const char *timer_mode[] = { "nosync", "(dummy)", "sync" }; +static const char timer_fmt[] = "New timer settings: style=%s mode=%s\n"; + +void Timer::modify_params(int narg, char **arg) { - double current_time = MPI_Wtime(); - return (current_time - array[which]); + int iarg = 0; + while (iarg < narg) { + if (strcmp(arg[iarg],timer_style[OFF]) == 0) { + _level = OFF; + } else if (strcmp(arg[iarg],timer_style[LOOP]) == 0) { + _level = LOOP; + } else if (strcmp(arg[iarg],timer_style[NORMAL]) == 0) { + _level = NORMAL; + } else if (strcmp(arg[iarg],timer_style[FULL]) == 0) { + _level = FULL; + } else if (strcmp(arg[iarg],timer_mode[OFF]) == 0) { + _sync = OFF; + } else if (strcmp(arg[iarg],timer_mode[NORMAL]) == 0) { + _sync = NORMAL; + } else error->all(FLERR,"Illegal timers command"); + ++iarg; + } + + if (comm->me == 0) { + if (screen) + fprintf(screen,timer_fmt,timer_style[_level],timer_mode[_sync]); + if (logfile) + fprintf(logfile,timer_fmt,timer_style[_level],timer_mode[_sync]); + } } diff --git a/src/timer.h b/src/timer.h index d62b764c49..adf554f468 100644 --- a/src/timer.h +++ b/src/timer.h @@ -16,26 +16,60 @@ #include "pointers.h" -enum{TIME_LOOP,TIME_PAIR,TIME_BOND,TIME_KSPACE,TIME_NEIGHBOR, - TIME_COMM,TIME_OUTPUT,TIME_N}; namespace LAMMPS_NS { class Timer : protected Pointers { public: - double *array; + + enum ttype {RESET=-2,START=-1,TOTAL=0,PAIR,BOND,KSPACE,NEIGH,COMM, + MODIFY,OUTPUT,SYNC,ALL,DEPHASE,DYNAMICS,QUENCH,NEB,REPCOMM, + REPOUT,NUM_TIMER}; + enum tlevel {OFF=0,LOOP,NORMAL,FULL}; Timer(class LAMMPS *); - ~Timer(); + ~Timer() {}; void init(); - void stamp(); - void stamp(int); - void barrier_start(int); - void barrier_stop(int); - double elapsed(int); + + // inline function to reduce overhead if we want no detailed timings + + void stamp(enum ttype which=START) { + if (_level > LOOP) _stamp(which); + } + + void barrier_start(); + void barrier_stop(); + + // accessor methods for supported level of detail + + bool has_loop() const { return (_level >= LOOP); } + bool has_normal() const { return (_level >= NORMAL); } + bool has_full() const { return (_level >= FULL); } + bool has_sync() const { return (_sync != OFF); } + + double elapsed(enum ttype); + double cpu(enum ttype); + + double get_cpu(enum ttype which) const { + return cpu_array[which]; }; + double get_wall(enum ttype which) const { + return wall_array[which]; }; + + void set_wall(enum ttype, double); + + + void modify_params(int, char **); private: - double previous_time; + double cpu_array[NUM_TIMER]; + double wall_array[NUM_TIMER]; + double previous_cpu; + double previous_wall; + int _level; // level of detail: off=0,loop=1,normal=2,full=3 + int _sync; // if nonzero, synchronize tasks before setting the timer + + // update requested timer array + void _stamp(enum ttype); }; } diff --git a/src/verlet.cpp b/src/verlet.cpp index b052a37bc0..345549d914 100644 --- a/src/verlet.cpp +++ b/src/verlet.cpp @@ -231,8 +231,10 @@ void Verlet::run(int n) // initial time integration + timer->stamp(); modify->initial_integrate(vflag); if (n_post_integrate) modify->post_integrate(); + timer->stamp(Timer::MODIFY); // regular communication vs neighbor list rebuild @@ -241,9 +243,13 @@ void Verlet::run(int n) if (nflag == 0) { timer->stamp(); comm->forward_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } else { - if (n_pre_exchange) modify->pre_exchange(); + if (n_pre_exchange) { + timer->stamp(); + modify->pre_exchange(); + timer->stamp(Timer::MODIFY); + } if (triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); if (domain->box_change) { @@ -256,10 +262,13 @@ void Verlet::run(int n) if (sortflag && ntimestep >= atom->nextsort) atom->sort(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); - timer->stamp(TIME_COMM); - if (n_pre_neighbor) modify->pre_neighbor(); + timer->stamp(Timer::COMM); + if (n_pre_neighbor) { + modify->pre_neighbor(); + timer->stamp(Timer::MODIFY); + } neighbor->build(); - timer->stamp(TIME_NEIGHBOR); + timer->stamp(Timer::NEIGH); } // force computations @@ -268,13 +277,17 @@ void Verlet::run(int n) // and Pair:ev_tally() needs to be called before any tallying force_clear(); - if (n_pre_force) modify->pre_force(vflag); timer->stamp(); + if (n_pre_force) { + modify->pre_force(vflag); + timer->stamp(Timer::MODIFY); + } + if (pair_compute_flag) { force->pair->compute(eflag,vflag); - timer->stamp(TIME_PAIR); + timer->stamp(Timer::PAIR); } if (atom->molecular) { @@ -282,19 +295,19 @@ void Verlet::run(int n) if (force->angle) force->angle->compute(eflag,vflag); if (force->dihedral) force->dihedral->compute(eflag,vflag); if (force->improper) force->improper->compute(eflag,vflag); - timer->stamp(TIME_BOND); + timer->stamp(Timer::BOND); } if (kspace_compute_flag) { force->kspace->compute(eflag,vflag); - timer->stamp(TIME_KSPACE); + timer->stamp(Timer::KSPACE); } // reverse communication of forces if (force->newton) { comm->reverse_comm(); - timer->stamp(TIME_COMM); + timer->stamp(Timer::COMM); } // force modifications, final time integration, diagnostics @@ -302,13 +315,14 @@ void Verlet::run(int n) if (n_post_force) modify->post_force(vflag); modify->final_integrate(); if (n_end_of_step) modify->end_of_step(); + timer->stamp(Timer::MODIFY); // all output if (ntimestep == output->next) { timer->stamp(); output->write(ntimestep); - timer->stamp(TIME_OUTPUT); + timer->stamp(Timer::OUTPUT); } } } @@ -329,7 +343,6 @@ void Verlet::cleanup() void Verlet::force_clear() { - int i; size_t nbytes; if (external_force_clear) return; -- GitLab