From f9c7fa973b29f17bacc2d5dfd7686f08fd529f8d Mon Sep 17 00:00:00 2001 From: Tim Mattox <timothy.mattox@engilitycorp.com> Date: Wed, 18 Jul 2018 10:41:57 -0500 Subject: [PATCH] USER-DPD: propagate a minor performance bugfix throughout the DPDE code The fix_shardlow_kokkos.cpp code had already factored out a redundant sqrt() calculation in the innermost loop of ssa_update_dpde(). This changeset propagates an equivilent optimization to: fix_shardlow.cpp pair_dpd_fdt_energy.cpp pair_dpd_fdt_energy_kokkos.cpp The alpha_ij variable was really just an [itype][jtype] lookup parameter, replacing a sqrt() and two multiplies per interacting particle pair by a cached memory read. Even if there isn't much time savings, the code is now consistent across the various versions. --- src/KOKKOS/fix_shardlow_kokkos.cpp | 3 +-- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 3 ++- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 6 +++--- src/USER-DPD/fix_shardlow.cpp | 6 +++--- src/USER-DPD/pair_dpd_fdt_energy.cpp | 10 ++++++++-- src/USER-DPD/pair_dpd_fdt_energy.h | 2 +- 6 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 70055bf8c9..99e51ebe38 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -157,7 +157,6 @@ void FixShardlowKokkos<DeviceType>::init() k_pairDPDE->k_cutsq.template sync<DeviceType>(); d_cutsq = k_pairDPDE->k_cutsq.template view<DeviceType>(); - const double boltz2 = 2.0*force->boltz; for (int i = 1; i <= ntypes; i++) { for (int j = i; j <= ntypes; j++) { F_FLOAT cutone = k_pairDPDE->cut[i][j]; @@ -165,7 +164,7 @@ void FixShardlowKokkos<DeviceType>::init() else k_params.h_view(i,j).cutinv = FLT_MAX; k_params.h_view(i,j).halfsigma = 0.5*k_pairDPDE->sigma[i][j]; k_params.h_view(i,j).kappa = k_pairDPDE->kappa[i][j]; - k_params.h_view(i,j).alpha = sqrt(boltz2*k_pairDPDE->kappa[i][j]); + k_params.h_view(i,j).alpha = k_pairDPDE->alpha[i][j]; k_params.h_view(j,i) = k_params.h_view(i,j); diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 7ff536f8dd..038d95394a 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -591,7 +591,7 @@ void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeNo // Compute uCond randnum = rand_gen.normal(); kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa; - alpha_ij = sqrt(2.0*boltz*kappa_ij); + alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha; randPair = alpha_ij*wr*randnum*dtinvsqrt; uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd; @@ -676,6 +676,7 @@ double PairDPDfdtEnergyKokkos<DeviceType>::init_one(int i, int j) k_params.h_view(i,j).a0 = a0[i][j]; k_params.h_view(i,j).sigma = sigma[i][j]; k_params.h_view(i,j).kappa = kappa[i][j]; + k_params.h_view(i,j).alpha = alpha[i][j]; k_params.h_view(j,i) = k_params.h_view(i,j); if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) { m_params[i][j] = m_params[j][i] = k_params.h_view(i,j); diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 424779f839..12ffb668e2 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -88,10 +88,10 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { struct params_dpd { KOKKOS_INLINE_FUNCTION - params_dpd(){cut=0;a0=0;sigma=0;kappa=0;}; + params_dpd(){cut=0;a0=0;sigma=0;kappa=0;alpha=0;}; KOKKOS_INLINE_FUNCTION - params_dpd(int i){cut=0;a0=0;sigma=0;kappa=0;}; - F_FLOAT cut,a0,sigma,kappa; + params_dpd(int i){cut=0;a0=0;sigma=0;kappa=0;alpha=0;}; + F_FLOAT cut,a0,sigma,kappa,alpha; }; DAT::tdual_efloat_1d k_duCond,k_duMech; diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 06185dee7e..7fe865c8e6 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -354,9 +354,8 @@ void FixShardlow::ssa_update_dpde( double *uMech = atom->uMech; double *dpdTheta = atom->dpdTheta; - double *cut_i, *cut2_i, *sigma_i, *kappa_i; + double *cut_i, *cut2_i, *sigma_i, *kappa_i, *alpha_i; double theta_ij_inv, theta_i_inv; - const double boltz2 = 2.0*force->boltz; const double boltz_inv = 1.0/force->boltz; const double ftm2v = force->ftm2v; @@ -389,6 +388,7 @@ while (ct-- > 0) { cut_i = pairDPDE->cut[itype]; sigma_i = pairDPDE->sigma[itype]; kappa_i = pairDPDE->kappa[itype]; + alpha_i = pairDPDE->alpha[itype]; theta_i_inv = 1.0/dpdTheta[i]; const double mass_i = (rmass) ? rmass[i] : mass[itype]; const double massinv_i = 1.0 / mass_i; @@ -448,7 +448,7 @@ while (ct-- > 0) { // Compute uCond double kappa_ij = kappa_i[jtype]; - double alpha_ij = sqrt(boltz2*kappa_ij); + double alpha_ij = alpha_i[jtype]; double del_uCond = alpha_ij*wr*dtsqrt * es_normal(RNGstate); del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt; diff --git a/src/USER-DPD/pair_dpd_fdt_energy.cpp b/src/USER-DPD/pair_dpd_fdt_energy.cpp index d1f3cceed4..05dc52eac7 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.cpp +++ b/src/USER-DPD/pair_dpd_fdt_energy.cpp @@ -65,6 +65,7 @@ PairDPDfdtEnergy::~PairDPDfdtEnergy() memory->destroy(a0); memory->destroy(sigma); memory->destroy(kappa); + memory->destroy(alpha); memory->destroy(duCond); memory->destroy(duMech); } @@ -269,7 +270,7 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag) // Compute uCond randnum = random->gaussian(); kappa_ij = kappa[itype][jtype]; - alpha_ij = sqrt(2.0*force->boltz*kappa_ij); + alpha_ij = alpha[itype][jtype]; randPair = alpha_ij*wr*randnum*dtinvsqrt; uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd; @@ -322,6 +323,7 @@ void PairDPDfdtEnergy::allocate() memory->create(a0,n+1,n+1,"pair:a0"); memory->create(sigma,n+1,n+1,"pair:sigma"); memory->create(kappa,n+1,n+1,"pair:kappa"); + memory->create(alpha,n+1,n+1,"pair:alpha"); if (!splitFDT_flag) { memory->create(duCond,nlocal+nghost+1,"pair:duCond"); memory->create(duMech,nlocal+nghost+1,"pair:duMech"); @@ -374,11 +376,12 @@ void PairDPDfdtEnergy::coeff(int narg, char **arg) double a0_one = force->numeric(FLERR,arg[2]); double sigma_one = force->numeric(FLERR,arg[3]); double cut_one = cut_global; - double kappa_one; + double kappa_one, alpha_one; a0_is_zero = (a0_one == 0.0); // Typical use with SSA is to set a0 to zero kappa_one = force->numeric(FLERR,arg[4]); + alpha_one = sqrt(2.0*force->boltz*kappa_one); if (narg == 6) cut_one = force->numeric(FLERR,arg[5]); int count = 0; @@ -387,6 +390,7 @@ void PairDPDfdtEnergy::coeff(int narg, char **arg) a0[i][j] = a0_one; sigma[i][j] = sigma_one; kappa[i][j] = kappa_one; + alpha[i][j] = alpha_one; cut[i][j] = cut_one; setflag[i][j] = 1; count++; @@ -435,6 +439,7 @@ double PairDPDfdtEnergy::init_one(int i, int j) a0[j][i] = a0[i][j]; sigma[j][i] = sigma[i][j]; kappa[j][i] = kappa[i][j]; + alpha[j][i] = alpha[i][j]; return cut[i][j]; } @@ -488,6 +493,7 @@ void PairDPDfdtEnergy::read_restart(FILE *fp) MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world); MPI_Bcast(&kappa[i][j],1,MPI_DOUBLE,0,world); MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); + alpha[i][j] = sqrt(2.0*force->boltz*kappa[i][j]); a0_is_zero = a0_is_zero && (a0[i][j] == 0.0); // verify the zero assumption } } diff --git a/src/USER-DPD/pair_dpd_fdt_energy.h b/src/USER-DPD/pair_dpd_fdt_energy.h index dce39f83f0..e21b48f7bd 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.h +++ b/src/USER-DPD/pair_dpd_fdt_energy.h @@ -43,7 +43,7 @@ class PairDPDfdtEnergy : public Pair { double **cut; double **a0; - double **sigma,**kappa; + double **sigma,**kappa,**alpha; double *duCond,*duMech; int seed; -- GitLab