From f9c7fa973b29f17bacc2d5dfd7686f08fd529f8d Mon Sep 17 00:00:00 2001
From: Tim Mattox <timothy.mattox@engilitycorp.com>
Date: Wed, 18 Jul 2018 10:41:57 -0500
Subject: [PATCH] USER-DPD: propagate a minor performance bugfix throughout the
 DPDE code

The fix_shardlow_kokkos.cpp code had already factored out a redundant
sqrt() calculation in the innermost loop of ssa_update_dpde().  This
changeset propagates an equivilent optimization to:
  fix_shardlow.cpp
  pair_dpd_fdt_energy.cpp
  pair_dpd_fdt_energy_kokkos.cpp
The alpha_ij variable was really just an [itype][jtype] lookup parameter,
replacing a sqrt() and two multiplies per interacting particle pair
by a cached memory read.  Even if there isn't much time savings, the
code is now consistent across the various versions.
---
 src/KOKKOS/fix_shardlow_kokkos.cpp        |  3 +--
 src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp |  3 ++-
 src/KOKKOS/pair_dpd_fdt_energy_kokkos.h   |  6 +++---
 src/USER-DPD/fix_shardlow.cpp             |  6 +++---
 src/USER-DPD/pair_dpd_fdt_energy.cpp      | 10 ++++++++--
 src/USER-DPD/pair_dpd_fdt_energy.h        |  2 +-
 6 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp
index 70055bf8c9..99e51ebe38 100644
--- a/src/KOKKOS/fix_shardlow_kokkos.cpp
+++ b/src/KOKKOS/fix_shardlow_kokkos.cpp
@@ -157,7 +157,6 @@ void FixShardlowKokkos<DeviceType>::init()
   k_pairDPDE->k_cutsq.template sync<DeviceType>();
   d_cutsq = k_pairDPDE->k_cutsq.template view<DeviceType>();
 
-  const double boltz2 = 2.0*force->boltz;
   for (int i = 1; i <= ntypes; i++) {
     for (int j = i; j <= ntypes; j++) {
       F_FLOAT cutone = k_pairDPDE->cut[i][j];
@@ -165,7 +164,7 @@ void FixShardlowKokkos<DeviceType>::init()
       else k_params.h_view(i,j).cutinv = FLT_MAX;
       k_params.h_view(i,j).halfsigma = 0.5*k_pairDPDE->sigma[i][j];
       k_params.h_view(i,j).kappa = k_pairDPDE->kappa[i][j];
-      k_params.h_view(i,j).alpha = sqrt(boltz2*k_pairDPDE->kappa[i][j]);
+      k_params.h_view(i,j).alpha = k_pairDPDE->alpha[i][j];
 
       k_params.h_view(j,i) = k_params.h_view(i,j);
 
diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
index 7ff536f8dd..038d95394a 100644
--- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
+++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
@@ -591,7 +591,7 @@ void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeNo
       // Compute uCond
       randnum = rand_gen.normal();
       kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa;
-      alpha_ij = sqrt(2.0*boltz*kappa_ij);
+      alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha;
       randPair = alpha_ij*wr*randnum*dtinvsqrt;
 
       uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd;
@@ -676,6 +676,7 @@ double PairDPDfdtEnergyKokkos<DeviceType>::init_one(int i, int j)
   k_params.h_view(i,j).a0 = a0[i][j];
   k_params.h_view(i,j).sigma = sigma[i][j];
   k_params.h_view(i,j).kappa = kappa[i][j];
+  k_params.h_view(i,j).alpha = alpha[i][j];
   k_params.h_view(j,i) = k_params.h_view(i,j);
   if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
     m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h
index 424779f839..12ffb668e2 100644
--- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h
+++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h
@@ -88,10 +88,10 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy {
 
   struct params_dpd {
     KOKKOS_INLINE_FUNCTION
-    params_dpd(){cut=0;a0=0;sigma=0;kappa=0;};
+    params_dpd(){cut=0;a0=0;sigma=0;kappa=0;alpha=0;};
     KOKKOS_INLINE_FUNCTION
-    params_dpd(int i){cut=0;a0=0;sigma=0;kappa=0;};
-    F_FLOAT cut,a0,sigma,kappa;
+    params_dpd(int i){cut=0;a0=0;sigma=0;kappa=0;alpha=0;};
+    F_FLOAT cut,a0,sigma,kappa,alpha;
   };
 
   DAT::tdual_efloat_1d k_duCond,k_duMech;
diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp
index 06185dee7e..7fe865c8e6 100644
--- a/src/USER-DPD/fix_shardlow.cpp
+++ b/src/USER-DPD/fix_shardlow.cpp
@@ -354,9 +354,8 @@ void FixShardlow::ssa_update_dpde(
   double *uMech = atom->uMech;
   double *dpdTheta = atom->dpdTheta;
 
-  double *cut_i, *cut2_i, *sigma_i, *kappa_i;
+  double *cut_i, *cut2_i, *sigma_i, *kappa_i, *alpha_i;
   double theta_ij_inv, theta_i_inv;
-  const double boltz2 = 2.0*force->boltz;
   const double boltz_inv = 1.0/force->boltz;
   const double ftm2v = force->ftm2v;
 
@@ -389,6 +388,7 @@ while (ct-- > 0) {
   cut_i  = pairDPDE->cut[itype];
   sigma_i = pairDPDE->sigma[itype];
   kappa_i = pairDPDE->kappa[itype];
+  alpha_i = pairDPDE->alpha[itype];
   theta_i_inv = 1.0/dpdTheta[i];
   const double mass_i = (rmass) ? rmass[i] : mass[itype];
   const double massinv_i = 1.0 / mass_i;
@@ -448,7 +448,7 @@ while (ct-- > 0) {
 
       // Compute uCond
       double kappa_ij = kappa_i[jtype];
-      double alpha_ij = sqrt(boltz2*kappa_ij);
+      double alpha_ij = alpha_i[jtype];
       double del_uCond = alpha_ij*wr*dtsqrt * es_normal(RNGstate);
 
       del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt;
diff --git a/src/USER-DPD/pair_dpd_fdt_energy.cpp b/src/USER-DPD/pair_dpd_fdt_energy.cpp
index d1f3cceed4..05dc52eac7 100644
--- a/src/USER-DPD/pair_dpd_fdt_energy.cpp
+++ b/src/USER-DPD/pair_dpd_fdt_energy.cpp
@@ -65,6 +65,7 @@ PairDPDfdtEnergy::~PairDPDfdtEnergy()
     memory->destroy(a0);
     memory->destroy(sigma);
     memory->destroy(kappa);
+    memory->destroy(alpha);
     memory->destroy(duCond);
     memory->destroy(duMech);
   }
@@ -269,7 +270,7 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag)
           // Compute uCond
           randnum = random->gaussian();
           kappa_ij = kappa[itype][jtype];
-          alpha_ij = sqrt(2.0*force->boltz*kappa_ij);
+          alpha_ij = alpha[itype][jtype];
           randPair = alpha_ij*wr*randnum*dtinvsqrt;
 
           uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd;
@@ -322,6 +323,7 @@ void PairDPDfdtEnergy::allocate()
   memory->create(a0,n+1,n+1,"pair:a0");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(kappa,n+1,n+1,"pair:kappa");
+  memory->create(alpha,n+1,n+1,"pair:alpha");
   if (!splitFDT_flag) {
     memory->create(duCond,nlocal+nghost+1,"pair:duCond");
     memory->create(duMech,nlocal+nghost+1,"pair:duMech");
@@ -374,11 +376,12 @@ void PairDPDfdtEnergy::coeff(int narg, char **arg)
   double a0_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double cut_one = cut_global;
-  double kappa_one;
+  double kappa_one, alpha_one;
 
   a0_is_zero = (a0_one == 0.0); // Typical use with SSA is to set a0 to zero
 
   kappa_one = force->numeric(FLERR,arg[4]);
+  alpha_one = sqrt(2.0*force->boltz*kappa_one);
   if (narg == 6) cut_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
@@ -387,6 +390,7 @@ void PairDPDfdtEnergy::coeff(int narg, char **arg)
       a0[i][j] = a0_one;
       sigma[i][j] = sigma_one;
       kappa[i][j] = kappa_one;
+      alpha[i][j] = alpha_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
@@ -435,6 +439,7 @@ double PairDPDfdtEnergy::init_one(int i, int j)
   a0[j][i] = a0[i][j];
   sigma[j][i] = sigma[i][j];
   kappa[j][i] = kappa[i][j];
+  alpha[j][i] = alpha[i][j];
 
   return cut[i][j];
 }
@@ -488,6 +493,7 @@ void PairDPDfdtEnergy::read_restart(FILE *fp)
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&kappa[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
+        alpha[i][j] = sqrt(2.0*force->boltz*kappa[i][j]);
         a0_is_zero = a0_is_zero && (a0[i][j] == 0.0); // verify the zero assumption
       }
     }
diff --git a/src/USER-DPD/pair_dpd_fdt_energy.h b/src/USER-DPD/pair_dpd_fdt_energy.h
index dce39f83f0..e21b48f7bd 100644
--- a/src/USER-DPD/pair_dpd_fdt_energy.h
+++ b/src/USER-DPD/pair_dpd_fdt_energy.h
@@ -43,7 +43,7 @@ class PairDPDfdtEnergy : public Pair {
 
   double **cut;
   double **a0;
-  double **sigma,**kappa;
+  double **sigma,**kappa,**alpha;
   double *duCond,*duMech;
 
   int seed;
-- 
GitLab