From e337db40593ce56685be1dfdc20fd513680b65d5 Mon Sep 17 00:00:00 2001 From: Stan Moore <stamoor@sandia.gov> Date: Mon, 6 Nov 2017 11:31:43 -0700 Subject: [PATCH] Replicate bbox from Chris Knight --- doc/src/replicate.txt | 12 +- src/replicate.cpp | 480 +++++++++++++++++++++++++++++++++++------- 2 files changed, 417 insertions(+), 75 deletions(-) diff --git a/doc/src/replicate.txt b/doc/src/replicate.txt index 291558e0e5..08523ecdd8 100644 --- a/doc/src/replicate.txt +++ b/doc/src/replicate.txt @@ -10,9 +10,11 @@ replicate command :h3 [Syntax:] -replicate nx ny nz :pre +replicate nx ny nz {keyword} :pre -nx,ny,nz = replication factors in each dimension :ul +nx,ny,nz = replication factors in each dimension :ulb +optional {keyword} = {bbox} :l + {bbox} = only check atoms in replicas that overlap with a processor's subdomain :ule [Examples:] @@ -43,6 +45,12 @@ file that crosses a periodic boundary should be between two atoms with image flags that differ by 1. This will allow the bond to be unwrapped appropriately. +The optional keyword {bbox} uses a bounding box to only check atoms +in replicas that overlap with a processor's subdomain when assigning +atoms to processors, and thus can result in substantial speedups for +calculations using a large number of processors. It does require +temporarily using more memory. + [Restrictions:] A 2d simulation cannot be replicated in the z dimension. diff --git a/src/replicate.cpp b/src/replicate.cpp index f3d1964169..1251688211 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -44,7 +44,7 @@ void Replicate::command(int narg, char **arg) if (domain->box_exist == 0) error->all(FLERR,"Replicate command before simulation box is defined"); - if (narg != 3) error->all(FLERR,"Illegal replicate command"); + if (narg < 3 || narg > 4) error->all(FLERR,"Illegal replicate command"); int me = comm->me; int nprocs = comm->nprocs; @@ -58,6 +58,10 @@ void Replicate::command(int narg, char **arg) int nz = force->inumeric(FLERR,arg[2]); int nrep = nx*ny*nz; + int bbox_flag = 0; + if (narg == 4) + if (strcmp(arg[3],"bbox") == 0) bbox_flag = 1; + // error and warning checks if (nx <= 0 || ny <= 0 || nz <= 0) @@ -99,6 +103,37 @@ void Replicate::command(int narg, char **arg) maxmol = maxmol_all; } + // check image flags maximum extent; only efficient small image flags compared to new system + + int _imagelo[3], _imagehi[3]; + _imagelo[0] = 0; + _imagelo[1] = 0; + _imagelo[2] = 0; + _imagehi[0] = 0; + _imagehi[1] = 0; + _imagehi[2] = 0; + + if (bbox_flag) { + + for (i=0; i<atom->nlocal; ++i) { + imageint image = atom->image[i]; + int xbox = (image & IMGMASK) - IMGMAX; + int ybox = (image >> IMGBITS & IMGMASK) - IMGMAX; + int zbox = (image >> IMG2BITS) - IMGMAX; + + if (xbox < _imagelo[0]) _imagelo[0] = xbox; + if (ybox < _imagelo[1]) _imagelo[1] = ybox; + if (zbox < _imagelo[2]) _imagelo[2] = zbox; + + if (xbox > _imagehi[0]) _imagehi[0] = xbox; + if (ybox > _imagehi[1]) _imagehi[1] = ybox; + if (zbox > _imagehi[2]) _imagehi[2] = zbox; + } + + MPI_Allreduce(MPI_IN_PLACE, &(_imagelo[0]), 3, MPI_INT, MPI_MIN, world); + MPI_Allreduce(MPI_IN_PLACE, &(_imagehi[0]), 3, MPI_INT, MPI_MAX, world); + } + // unmap existing atoms via image flags for (i = 0; i < atom->nlocal; i++) @@ -280,93 +315,392 @@ void Replicate::command(int narg, char **arg) double *coord; int tag_enable = atom->tag_enable; - for (int iproc = 0; iproc < nprocs; iproc++) { - if (me == iproc) { - n = 0; - for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); + if (bbox_flag) { + + // allgather size of buf on each proc + + n = 0; + for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); + + int * size_buf_rnk; + memory->create(size_buf_rnk, nprocs, "replicate:size_buf_rnk"); + + MPI_Allgather(&n, 1, MPI_INT, size_buf_rnk, 1, MPI_INT, world); + + // size of buf_all + + int size_buf_all = 0; + MPI_Allreduce(&n, &size_buf_all, 1, MPI_INT, MPI_SUM, world); + + if (me == 0 && screen) { + fprintf(screen," bounding box image = (%i %i %i) to (%i %i %i)\n", + _imagelo[0],_imagelo[1],_imagelo[2],_imagehi[0],_imagehi[1],_imagehi[2]); + fprintf(screen," bounding box extra memory = %.2f MB\n", + (double)size_buf_all*sizeof(double)/1024/1024); } - MPI_Bcast(&n,1,MPI_INT,iproc,world); - MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world); + + // rnk offsets + + int * disp_buf_rnk; + memory->create(disp_buf_rnk, nprocs, "replicate:disp_buf_rnk"); + disp_buf_rnk[0] = 0; + for (i=1; i<nprocs; ++i) disp_buf_rnk[i] = disp_buf_rnk[i-1] + size_buf_rnk[i-1]; + + // allgather buf_all + + double * buf_all; + memory->create(buf_all, size_buf_all, "replicate:buf_all"); + + MPI_Allgatherv(buf, n, MPI_DOUBLE, buf_all, size_buf_rnk, disp_buf_rnk, MPI_DOUBLE, world); + + // bounding box of original unwrapped system + + double _orig_lo[3], _orig_hi[3]; + if (triclinic) { + _orig_lo[0] = domain->boxlo[0] + _imagelo[0] * old_xprd + _imagelo[1] * old_xy + _imagelo[2] * old_xz; + _orig_lo[1] = domain->boxlo[1] + _imagelo[1] * old_yprd + _imagelo[2] * old_yz; + _orig_lo[2] = domain->boxlo[2] + _imagelo[2] * old_zprd; + + _orig_hi[0] = domain->boxlo[0] + (_imagehi[0]+1) * old_xprd + (_imagehi[1]+1) * old_xy + (_imagehi[2]+1) * old_xz; + _orig_hi[1] = domain->boxlo[1] + (_imagehi[1]+1) * old_yprd + (_imagehi[2]+1) * old_yz; + _orig_hi[2] = domain->boxlo[2] + (_imagehi[2]+1) * old_zprd; + } else { + _orig_lo[0] = domain->boxlo[0] + _imagelo[0] * old_xprd; + _orig_lo[1] = domain->boxlo[1] + _imagelo[1] * old_yprd; + _orig_lo[2] = domain->boxlo[2] + _imagelo[2] * old_zprd; + + _orig_hi[0] = domain->boxlo[0] + (_imagehi[0]+1) * old_xprd; + _orig_hi[1] = domain->boxlo[1] + (_imagehi[1]+1) * old_yprd; + _orig_hi[2] = domain->boxlo[2] + (_imagehi[2]+1) * old_zprd; + } + + double _lo[3], _hi[3]; + + int num_replicas_added = 0; for (ix = 0; ix < nx; ix++) { for (iy = 0; iy < ny; iy++) { for (iz = 0; iz < nz; iz++) { - // while loop over one proc's atom list + // domain->remap() overwrites coordinates, so always recompute here + + if (triclinic) { + _lo[0] = _orig_lo[0] + ix * old_xprd + iy * old_xy + iz * old_xz; + _hi[0] = _orig_hi[0] + ix * old_xprd + iy * old_xy + iz * old_xz; + + _lo[1] = _orig_lo[1] + iy * old_yprd + iz * old_yz; + _hi[1] = _orig_hi[1] + iy * old_yprd + iz * old_yz; + + _lo[2] = _orig_lo[2] + iz * old_zprd; + _hi[2] = _orig_hi[2] + iz * old_zprd; + } else { + _lo[0] = _orig_lo[0] + ix * old_xprd; + _hi[0] = _orig_hi[0] + ix * old_xprd; + + _lo[1] = _orig_lo[1] + iy * old_yprd; + _hi[1] = _orig_hi[1] + iy * old_yprd; - m = 0; - while (m < n) { - image = ((imageint) IMGMAX << IMG2BITS) | + _lo[2] = _orig_lo[2] + iz * old_zprd; + _hi[2] = _orig_hi[2] + iz * old_zprd; + } + + // test if bounding box of shifted replica overlaps sub-domain of proc + // if not, then skip testing atoms + + int xoverlap = 1; + int yoverlap = 1; + int zoverlap = 1; + if (triclinic) { + double _llo[3]; + domain->x2lamda(_lo,_llo); + double _lhi[3]; + domain->x2lamda(_hi,_lhi); + + if (_llo[0] > (subhi[0] - EPSILON) + || _lhi[0] < (sublo[0] + EPSILON) ) xoverlap = 0; + if (_llo[1] > (subhi[1] - EPSILON) + || _lhi[1] < (sublo[1] + EPSILON) ) yoverlap = 0; + if (_llo[2] > (subhi[2] - EPSILON) + || _lhi[2] < (sublo[2] + EPSILON) ) zoverlap = 0; + } else { + if (_lo[0] > (subhi[0] - EPSILON) + || _hi[0] < (sublo[0] + EPSILON) ) xoverlap = 0; + if (_lo[1] > (subhi[1] - EPSILON) + || _hi[1] < (sublo[1] + EPSILON) ) yoverlap = 0; + if (_lo[2] > (subhi[2] - EPSILON) + || _hi[2] < (sublo[2] + EPSILON) ) zoverlap = 0; + } + + int overlap = 0; + if (xoverlap && yoverlap && zoverlap) overlap = 1; + + // if no overlap, test if bounding box wrapped back into new system + + if (!overlap) { + + // wrap back into cell + + imageint imagelo = ((imageint) IMGMAX << IMG2BITS) | ((imageint) IMGMAX << IMGBITS) | IMGMAX; - if (triclinic == 0) { - x[0] = buf[m+1] + ix*old_xprd; - x[1] = buf[m+2] + iy*old_yprd; - x[2] = buf[m+3] + iz*old_zprd; - } else { - x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; - x[1] = buf[m+2] + iy*old_yprd + iz*old_yz; - x[2] = buf[m+3] + iz*old_zprd; - } - domain->remap(x,image); + domain->remap(&(_lo[0]), imagelo); + int xboxlo = (imagelo & IMGMASK) - IMGMAX; + int yboxlo = (imagelo >> IMGBITS & IMGMASK) - IMGMAX; + int zboxlo = (imagelo >> IMG2BITS) - IMGMAX; + + imageint imagehi = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + domain->remap(&(_hi[0]), imagehi); + int xboxhi = (imagehi & IMGMASK) - IMGMAX; + int yboxhi = (imagehi >> IMGBITS & IMGMASK) - IMGMAX; + int zboxhi = (imagehi >> IMG2BITS) - IMGMAX; + if (triclinic) { - domain->x2lamda(x,lamda); - coord = lamda; - } else coord = x; - - if (coord[0] >= sublo[0] && coord[0] < subhi[0] && - coord[1] >= sublo[1] && coord[1] < subhi[1] && - coord[2] >= sublo[2] && coord[2] < subhi[2]) { - - m += avec->unpack_restart(&buf[m]); - - i = atom->nlocal - 1; - if (tag_enable) - atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; - else atom_offset = 0; - mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; - - atom->x[i][0] = x[0]; - atom->x[i][1] = x[1]; - atom->x[i][2] = x[2]; - - atom->tag[i] += atom_offset; - atom->image[i] = image; - - if (atom->molecular) { - if (atom->molecule[i] > 0) - atom->molecule[i] += mol_offset; - if (atom->molecular == 1) { - if (atom->avec->bonds_allow) - for (j = 0; j < atom->num_bond[i]; j++) - atom->bond_atom[i][j] += atom_offset; - if (atom->avec->angles_allow) - for (j = 0; j < atom->num_angle[i]; j++) { - atom->angle_atom1[i][j] += atom_offset; - atom->angle_atom2[i][j] += atom_offset; - atom->angle_atom3[i][j] += atom_offset; - } - if (atom->avec->dihedrals_allow) - for (j = 0; j < atom->num_dihedral[i]; j++) { - atom->dihedral_atom1[i][j] += atom_offset; - atom->dihedral_atom2[i][j] += atom_offset; - atom->dihedral_atom3[i][j] += atom_offset; - atom->dihedral_atom4[i][j] += atom_offset; - } - if (atom->avec->impropers_allow) - for (j = 0; j < atom->num_improper[i]; j++) { - atom->improper_atom1[i][j] += atom_offset; - atom->improper_atom2[i][j] += atom_offset; - atom->improper_atom3[i][j] += atom_offset; - atom->improper_atom4[i][j] += atom_offset; - } + double _llo[3]; + _llo[0] = _lo[0]; _llo[1] = _lo[1]; _llo[2] = _lo[2]; + domain->x2lamda(_llo,_lo); + + double _lhi[3]; + _lhi[0] = _hi[0]; _lhi[1] = _hi[1]; _lhi[2] = _hi[2]; + domain->x2lamda(_lhi,_hi); + } + + // test all fragments for any overlap; ok to include false positives + + int _xoverlap1 = 0; + int _xoverlap2 = 0; + if (!xoverlap) { + if (xboxlo < 0) { + _xoverlap1 = 1; + if ( _lo[0] > (subhi[0] - EPSILON) ) _xoverlap1 = 0; + } + + if (xboxhi > 0) { + _xoverlap2 = 1; + if ( _hi[0] < (sublo[0] + EPSILON) ) _xoverlap2 = 0; + } + + if (_xoverlap1 || _xoverlap2) xoverlap = 1; + } + + int _yoverlap1 = 0; + int _yoverlap2 = 0; + if (!yoverlap) { + if (yboxlo < 0) { + _yoverlap1 = 1; + if ( _lo[1] > (subhi[1] - EPSILON) ) _yoverlap1 = 0; + } + + if (yboxhi > 0) { + _yoverlap2 = 1; + if ( _hi[1] < (sublo[1] + EPSILON) ) _yoverlap2 = 0; + } + + if (_yoverlap1 || _yoverlap2) yoverlap = 1; + } + + + int _zoverlap1 = 0; + int _zoverlap2 = 0; + if (!zoverlap) { + if (zboxlo < 0) { + _zoverlap1 = 1; + if ( _lo[2] > (subhi[2] - EPSILON) ) _zoverlap1 = 0; + } + + if (zboxhi > 0) { + _zoverlap2 = 1; + if ( _hi[2] < (sublo[2] + EPSILON) ) _zoverlap2 = 0; + } + + if (_zoverlap1 || _zoverlap2) zoverlap = 1; + } + + // does either fragment overlap w/ sub-domain + + if (xoverlap && yoverlap && zoverlap) overlap = 1; + } + + // while loop over one proc's atom list + + if (overlap) { + num_replicas_added++; + + m = 0; + while (m < size_buf_all) { + image = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + if (triclinic == 0) { + x[0] = buf_all[m+1] + ix*old_xprd; + x[1] = buf_all[m+2] + iy*old_yprd; + x[2] = buf_all[m+3] + iz*old_zprd; + } else { + x[0] = buf_all[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; + x[1] = buf_all[m+2] + iy*old_yprd + iz*old_yz; + x[2] = buf_all[m+3] + iz*old_zprd; + } + domain->remap(x,image); + if (triclinic) { + domain->x2lamda(x,lamda); + coord = lamda; + } else coord = x; + + if (coord[0] >= sublo[0] && coord[0] < subhi[0] && + coord[1] >= sublo[1] && coord[1] < subhi[1] && + coord[2] >= sublo[2] && coord[2] < subhi[2]) { + + m += avec->unpack_restart(&buf_all[m]); + + i = atom->nlocal - 1; + if (tag_enable) + atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; + else atom_offset = 0; + mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; + + atom->x[i][0] = x[0]; + atom->x[i][1] = x[1]; + atom->x[i][2] = x[2]; + + atom->tag[i] += atom_offset; + atom->image[i] = image; + + if (atom->molecular) { + if (atom->molecule[i] > 0) + atom->molecule[i] += mol_offset; + if (atom->molecular == 1) { + if (atom->avec->bonds_allow) + for (j = 0; j < atom->num_bond[i]; j++) + atom->bond_atom[i][j] += atom_offset; + if (atom->avec->angles_allow) + for (j = 0; j < atom->num_angle[i]; j++) { + atom->angle_atom1[i][j] += atom_offset; + atom->angle_atom2[i][j] += atom_offset; + atom->angle_atom3[i][j] += atom_offset; + } + if (atom->avec->dihedrals_allow) + for (j = 0; j < atom->num_dihedral[i]; j++) { + atom->dihedral_atom1[i][j] += atom_offset; + atom->dihedral_atom2[i][j] += atom_offset; + atom->dihedral_atom3[i][j] += atom_offset; + atom->dihedral_atom4[i][j] += atom_offset; + } + if (atom->avec->impropers_allow) + for (j = 0; j < atom->num_improper[i]; j++) { + atom->improper_atom1[i][j] += atom_offset; + atom->improper_atom2[i][j] += atom_offset; + atom->improper_atom3[i][j] += atom_offset; + atom->improper_atom4[i][j] += atom_offset; + } + } } + } else m += static_cast<int> (buf_all[m]); + } + } // if (overlap) + + } + } + } + + memory->destroy(size_buf_rnk); + memory->destroy(disp_buf_rnk); + memory->destroy(buf_all); + + int sum = 0; + MPI_Reduce(&num_replicas_added, &sum, 1, MPI_INT, MPI_SUM, 0, world); + double avg = (double) sum / nprocs; + if (me == 0 && screen) + fprintf(screen," average # of replicas added to proc = %.2f out of %i (%.2f %%)\n", + avg,nx*ny*nz,avg/(nx*ny*nz)*100.0); + + } else { + + for (int iproc = 0; iproc < nprocs; iproc++) { + if (me == iproc) { + n = 0; + for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); + } + MPI_Bcast(&n,1,MPI_INT,iproc,world); + MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world); + + for (ix = 0; ix < nx; ix++) { + for (iy = 0; iy < ny; iy++) { + for (iz = 0; iz < nz; iz++) { + + // while loop over one proc's atom list + + m = 0; + while (m < n) { + image = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + if (triclinic == 0) { + x[0] = buf[m+1] + ix*old_xprd; + x[1] = buf[m+2] + iy*old_yprd; + x[2] = buf[m+3] + iz*old_zprd; + } else { + x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; + x[1] = buf[m+2] + iy*old_yprd + iz*old_yz; + x[2] = buf[m+3] + iz*old_zprd; } - } else m += static_cast<int> (buf[m]); + domain->remap(x,image); + if (triclinic) { + domain->x2lamda(x,lamda); + coord = lamda; + } else coord = x; + + if (coord[0] >= sublo[0] && coord[0] < subhi[0] && + coord[1] >= sublo[1] && coord[1] < subhi[1] && + coord[2] >= sublo[2] && coord[2] < subhi[2]) { + + m += avec->unpack_restart(&buf[m]); + + i = atom->nlocal - 1; + if (tag_enable) + atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; + else atom_offset = 0; + mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; + + atom->x[i][0] = x[0]; + atom->x[i][1] = x[1]; + atom->x[i][2] = x[2]; + + atom->tag[i] += atom_offset; + atom->image[i] = image; + + if (atom->molecular) { + if (atom->molecule[i] > 0) + atom->molecule[i] += mol_offset; + if (atom->molecular == 1) { + if (atom->avec->bonds_allow) + for (j = 0; j < atom->num_bond[i]; j++) + atom->bond_atom[i][j] += atom_offset; + if (atom->avec->angles_allow) + for (j = 0; j < atom->num_angle[i]; j++) { + atom->angle_atom1[i][j] += atom_offset; + atom->angle_atom2[i][j] += atom_offset; + atom->angle_atom3[i][j] += atom_offset; + } + if (atom->avec->dihedrals_allow) + for (j = 0; j < atom->num_dihedral[i]; j++) { + atom->dihedral_atom1[i][j] += atom_offset; + atom->dihedral_atom2[i][j] += atom_offset; + atom->dihedral_atom3[i][j] += atom_offset; + atom->dihedral_atom4[i][j] += atom_offset; + } + if (atom->avec->impropers_allow) + for (j = 0; j < atom->num_improper[i]; j++) { + atom->improper_atom1[i][j] += atom_offset; + atom->improper_atom2[i][j] += atom_offset; + atom->improper_atom3[i][j] += atom_offset; + atom->improper_atom4[i][j] += atom_offset; + } + } + } + } else m += static_cast<int> (buf[m]); + } } } } } - } + } // if (bbox_flag) // free communication buffer and old atom class -- GitLab