Skip to content
Snippets Groups Projects
Commit 31ed3f71 authored by Steven J. Plimpton's avatar Steven J. Plimpton
Browse files

more changes to insure each triplet IJK computed exactly once

parent c01fb5f7
No related branches found
No related tags found
No related merge requests found
...@@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252) ...@@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252)
1 by 1 by 1 MPI processor grid 1 by 1 by 1 MPI processor grid
create_atoms 1 box create_atoms 1 box
Created 4000 atoms Created 4000 atoms
Time spent = 0.00126314 secs Time spent = 0.00139618 secs
pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5 pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5
pair_coeff * * lj/cut 1.0 1.0 pair_coeff * * lj/cut 1.0 1.0
...@@ -60,26 +60,26 @@ Neighbor list info ... ...@@ -60,26 +60,26 @@ Neighbor list info ...
bin: standard bin: standard
Per MPI rank memory allocation (min/avg/max) = 11.47 | 11.47 | 11.47 Mbytes Per MPI rank memory allocation (min/avg/max) = 11.47 | 11.47 | 11.47 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
0 1.033 -4.8899813 0 -3.3408686 -4.2298176 0 1.033 -4.8404387 0 -3.291326 -4.1332095
5 1.0337853 -4.8928208 0 -3.3425304 -4.2233154 5 1.0337247 -4.8402263 0 -3.290027 -4.1207962
10 1.0358056 -4.8953304 0 -3.3420104 -4.1897183 10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158
15 1.0380938 -4.8990457 0 -3.3422942 -4.1310148 15 1.0376519 -4.84599 0 -3.2899013 -4.0278711
20 1.0389566 -4.9014345 0 -3.3433892 -4.0406616 20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052
25 1.0358313 -4.8989663 0 -3.3456079 -3.9093019 25 1.0347886 -4.84473 0 -3.2929351 -3.8044469
Loop time of 12.2062 on 1 procs for 25 steps with 4000 atoms Loop time of 15.95 on 1 procs for 25 steps with 4000 atoms
Performance: 353.920 tau/day, 2.048 timesteps/s Performance: 270.846 tau/day, 1.567 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads 100.0% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 12.202 | 12.202 | 12.202 | 0.0 | 99.96 Pair | 15.946 | 15.946 | 15.946 | 0.0 | 99.97
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.0015621 | 0.0015621 | 0.0015621 | 0.0 | 0.01 Comm | 0.0015042 | 0.0015042 | 0.0015042 | 0.0 | 0.01
Output | 0.00020814 | 0.00020814 | 0.00020814 | 0.0 | 0.00 Output | 0.00013781 | 0.00013781 | 0.00013781 | 0.0 | 0.00
Modify | 0.0019698 | 0.0019698 | 0.0019698 | 0.0 | 0.02 Modify | 0.0017776 | 0.0017776 | 0.0017776 | 0.0 | 0.01
Other | | 0.0007734 | | | 0.01 Other | | 0.0006771 | | | 0.00
Nlocal: 4000 ave 4000 max 4000 min Nlocal: 4000 ave 4000 max 4000 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
...@@ -97,4 +97,4 @@ Dangerous builds = 0 ...@@ -97,4 +97,4 @@ Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:13 Total wall time: 0:00:16
...@@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252) ...@@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252)
1 by 2 by 2 MPI processor grid 1 by 2 by 2 MPI processor grid
create_atoms 1 box create_atoms 1 box
Created 4000 atoms Created 4000 atoms
Time spent = 0.000785112 secs Time spent = 0.000900984 secs
pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5 pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5
pair_coeff * * lj/cut 1.0 1.0 pair_coeff * * lj/cut 1.0 1.0
...@@ -60,26 +60,26 @@ Neighbor list info ... ...@@ -60,26 +60,26 @@ Neighbor list info ...
bin: standard bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.532 | 5.532 | 5.532 Mbytes Per MPI rank memory allocation (min/avg/max) = 5.532 | 5.532 | 5.532 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
0 1.033 -4.8921547 0 -3.343042 -4.2340557 0 1.033 -4.8404387 0 -3.291326 -4.1332095
5 1.0337949 -4.8947881 0 -3.3444835 -4.2271456 5 1.0337247 -4.8402263 0 -3.290027 -4.1207962
10 1.0358286 -4.8973178 0 -3.3439632 -4.1935779 10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158
15 1.0381322 -4.9010593 0 -3.3442503 -4.134913 15 1.0376519 -4.84599 0 -3.2899013 -4.0278711
20 1.0390107 -4.9034854 0 -3.3453589 -4.0446162 20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052
25 1.0358988 -4.9010506 0 -3.3475908 -3.9133006 25 1.0347886 -4.84473 0 -3.2929351 -3.8044469
Loop time of 3.20632 on 4 procs for 25 steps with 4000 atoms Loop time of 4.34636 on 4 procs for 25 steps with 4000 atoms
Performance: 1347.340 tau/day, 7.797 timesteps/s Performance: 993.935 tau/day, 5.752 timesteps/s
100.0% CPU use with 4 MPI tasks x no OpenMP threads 99.6% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 3.1207 | 3.1553 | 3.1859 | 1.5 | 98.41 Pair | 3.9977 | 4.1036 | 4.209 | 4.9 | 94.41
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.019466 | 0.05009 | 0.084602 | 12.0 | 1.56 Comm | 0.13588 | 0.24134 | 0.34722 | 20.4 | 5.55
Output | 7.1049e-05 | 8.2076e-05 | 0.00011325 | 0.0 | 0.00 Output | 0.00013757 | 0.00015104 | 0.00016761 | 0.0 | 0.00
Modify | 0.00056338 | 0.00057292 | 0.00058413 | 0.0 | 0.02 Modify | 0.00087953 | 0.00091547 | 0.00095582 | 0.0 | 0.02
Other | | 0.0003092 | | | 0.01 Other | | 0.0003656 | | | 0.01
Nlocal: 1000 ave 1000 max 1000 min Nlocal: 1000 ave 1000 max 1000 min
Histogram: 4 0 0 0 0 0 0 0 0 0 Histogram: 4 0 0 0 0 0 0 0 0 0
...@@ -97,4 +97,4 @@ Dangerous builds = 0 ...@@ -97,4 +97,4 @@ Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:03 Total wall time: 0:00:04
...@@ -98,9 +98,15 @@ void PairATM::compute(int eflag, int vflag) ...@@ -98,9 +98,15 @@ void PairATM::compute(int eflag, int vflag)
numneigh = list->numneigh; numneigh = list->numneigh;
firstneigh = list->firstneigh; firstneigh = list->firstneigh;
int count1 = 0; // triple loop over local atoms and neighbors twice
int count2 = 0; // must compute each IJK triplet interaction exactly once
int count3 = 0; // by proc that owns the triplet atom with smallest x coord
// special logic to break ties if multiple atoms have same x or y coords
// inner two loops for jj=1,Jnum and kk=jj+1,Jnum insure
// the pair of other 2 non-minimum-x atoms is only considered once
// triplet geometry criteria for calculation:
// each pair distance <= cutoff
// produce of 3 pair distances <= cutoff_triple^3
for (ii = 0; ii < inum; ii++) { for (ii = 0; ii < inum; ii++) {
i = ilist[ii]; i = ilist[ii];
...@@ -112,12 +118,10 @@ void PairATM::compute(int eflag, int vflag) ...@@ -112,12 +118,10 @@ void PairATM::compute(int eflag, int vflag)
jnum = numneigh[i]; jnum = numneigh[i];
jnumm1 = jnum - 1; jnumm1 = jnum - 1;
// for (jj = 0; jj < jnumm1; jj++) { for (jj = 0; jj < jnumm1; jj++) {
// replace with this line:
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj]; j = jlist[jj];
j &= NEIGHMASK; j &= NEIGHMASK;
rij[0] = x[j][0] - xi; rij[0] = x[j][0] - xi;
if (rij[0] < 0.0) continue; if (rij[0] < 0.0) continue;
rij[1] = x[j][1] - yi; rij[1] = x[j][1] - yi;
...@@ -125,40 +129,33 @@ void PairATM::compute(int eflag, int vflag) ...@@ -125,40 +129,33 @@ void PairATM::compute(int eflag, int vflag)
rij[2] = x[j][2] - zi; rij[2] = x[j][2] - zi;
if (rij[0] == 0.0 and rij[1] == 0.0 and rij[2] < 0.0) continue; if (rij[0] == 0.0 and rij[1] == 0.0 and rij[2] < 0.0) continue;
rij2 = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2]; rij2 = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2];
count1++;
if (rij2 > cutoff_squared) continue; if (rij2 > cutoff_squared) continue;
count2++;
//for (kk = jj+1; kk < jnum; kk++) {
// replace with these two lines:
for (kk = 0; kk < jnum; kk++) {
if (kk == jj) continue;
for (kk = jj+1; kk < jnum; kk++) {
k = jlist[kk]; k = jlist[kk];
k &= NEIGHMASK; k &= NEIGHMASK;
rjk[0] = x[k][0] - x[j][0];
if (rjk[0] < 0.0) continue;
rjk[1] = x[k][1] - x[j][1];
if (rjk[0] == 0.0 and rjk[1] < 0.0) continue;
rjk[2] = x[k][2] - x[j][2];
if (rjk[0] == 0.0 and rjk[1] == 0.0 and rjk[2] < 0.0) continue;
rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2];
if (rjk2 > cutoff_squared) continue;
rik[0] = x[k][0] - xi; rik[0] = x[k][0] - xi;
if (rik[0] < 0.0) continue;
rik[1] = x[k][1] - yi; rik[1] = x[k][1] - yi;
if (rik[0] == 0.0 and rik[1] < 0.0) continue;
rik[2] = x[k][2] - zi; rik[2] = x[k][2] - zi;
if (rik[0] == 0.0 and rik[1] == 0.0 and rik[2] < 0.0) continue;
rik2 = rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2]; rik2 = rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2];
if (rik2 > cutoff_squared) continue; if (rik2 > cutoff_squared) continue;
rjk[0] = x[k][0] - x[j][0];
rjk[1] = x[k][1] - x[j][1];
rjk[2] = x[k][2] - x[j][2];
rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2];
if (rjk2 > cutoff_squared) continue;
double r6 = rij2*rjk2*rik2; double r6 = rij2*rjk2*rik2;
if (r6 > cutoff_triple_sixth) continue; if (r6 > cutoff_triple_sixth) continue;
nu_local = nu[type[i]][type[j]][type[k]]; nu_local = nu[type[i]][type[j]][type[k]];
if (nu_local == 0.0) continue; if (nu_local == 0.0) continue;
count3++;
interaction_ddd(nu_local, interaction_ddd(nu_local,
r6,rij2,rik2,rjk2,rij,rik,rjk,fj,fk,eflag,evdwl); r6,rij2,rik2,rjk2,rij,rik,rjk,fj,fk,eflag,evdwl);
...@@ -177,15 +174,6 @@ void PairATM::compute(int eflag, int vflag) ...@@ -177,15 +174,6 @@ void PairATM::compute(int eflag, int vflag)
} }
} }
int count = count1;
MPI_Allreduce(&count,&count1,1,MPI_INT,MPI_SUM,world);
count = count2;
MPI_Allreduce(&count,&count2,1,MPI_INT,MPI_SUM,world);
count = count3;
MPI_Allreduce(&count,&count3,1,MPI_INT,MPI_SUM,world);
printf("FORCE %g %d %d %d\n",cutoff_squared,count1,count2,count3);
if (vflag_fdotr) virial_fdotr_compute(); if (vflag_fdotr) virial_fdotr_compute();
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment