diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 4e810bbc2378c5bc37f9f5b59b09c675e6193cf1..34b17533ea66823d9c8cc0297e81b3abf55743bd 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -115,7 +115,8 @@ void PPPM::init() if (domain->triclinic) error->all(FLERR,"Cannot (yet) use PPPM with triclinic box"); - if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPM with 2d simulation"); + if (domain->dimension == 2) error->all(FLERR, + "Cannot use PPPM with 2d simulation"); if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); @@ -186,8 +187,8 @@ void PPPM::init() // if we have a /proxy pppm version check if the pair style is compatible - if ( (strcmp(force->kspace_style,"pppm/proxy") == 0) || - (strcmp(force->kspace_style,"pppm/tip4p/proxy") == 0) ) { + if ((strcmp(force->kspace_style,"pppm/proxy") == 0) || + (strcmp(force->kspace_style,"pppm/tip4p/proxy") == 0) ) { if (force->pair == NULL) error->all(FLERR,"KSpace style is incompatible with Pair style"); if (strstr(force->pair_style,"pppm/") == NULL ) @@ -1507,7 +1508,8 @@ void PPPM::particle_map() if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag = 1; + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag = 1; } if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); @@ -1527,7 +1529,8 @@ void PPPM::make_rho() // clear 3d density array - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR)); + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge @@ -1784,6 +1787,7 @@ void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) charge assignment into rho1d dx,dy,dz = distance of particle from "lower left" grid point ------------------------------------------------------------------------- */ + void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, const FFT_SCALAR &dz) { diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h index 1ee2cfbb4f3e408fddde72d63ac4ca19800efbe5..b3dbf1779c75a5cc3a56edeca021eb31eaa6b70c 100644 --- a/src/KSPACE/pppm.h +++ b/src/KSPACE/pppm.h @@ -1,4 +1,4 @@ -/* -*- c++ -*- ---------------------------------------------------------- +/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov diff --git a/src/PERI/pair_peri_lps.cpp b/src/PERI/pair_peri_lps.cpp index ab747109493b430e1cef082fdcc62ba041097ea5..10002f8d45735ffb229f92fc1002b2806862434a 100644 --- a/src/PERI/pair_peri_lps.cpp +++ b/src/PERI/pair_peri_lps.cpp @@ -41,7 +41,7 @@ using namespace LAMMPS_NS; PairPeriLPS::PairPeriLPS(LAMMPS *lmp) : Pair(lmp) { for (int i = 0; i < 6; i++) virial[i] = 0.0; - no_virial_fdotr_compute=1; + no_virial_fdotr_compute = 1; ifix_peri = -1; @@ -189,7 +189,8 @@ void PairPeriLPS::compute(int eflag, int vflag) } if (eflag) evdwl = 0.5*rk*dr; - if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair*vfrac[i],delx,dely,delz); + if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0, + fpair*vfrac[i],delx,dely,delz); } } } @@ -303,7 +304,8 @@ void PairPeriLPS::compute(int eflag, int vflag) if (eflag) evdwl = 0.5 * 15 * (shearmodulus[itype][itype]/wvolume[i]) * omega_plus*(deviatoric_extension * deviatoric_extension) * vfrac[j] * vfrac_scale; - if (evflag) ev_tally(i,i,nlocal,0,0.5*evdwl,0.0,0.5*fbond*vfrac[i],delx,dely,delz); + if (evflag) ev_tally(i,i,nlocal,0,0.5*evdwl,0.0, + 0.5*fbond*vfrac[i],delx,dely,delz); // find stretch in bond I-J and break if necessary // use s0 from previous timestep @@ -316,7 +318,8 @@ void PairPeriLPS::compute(int eflag, int vflag) if (first) s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch); else - s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch)); + s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - + (alpha[itype][jtype] * stretch)); first = false; } diff --git a/src/comm.cpp b/src/comm.cpp index 1651e0dfd7eba67f5d5824b5c9e576172ff3861d..8309382b78d6032d5aa998ad730a4c93fd515ba2 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -22,6 +22,7 @@ #include "stdio.h" #include "stdlib.h" #include "comm.h" +#include "universe.h" #include "atom.h" #include "atom_vec.h" #include "force.h" @@ -68,6 +69,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) cutghostmulti = NULL; cutghostuser = 0.0; ghost_velocity = 0; + recv_from_partition = send_to_partition = -1; // use of OpenMP threads // query OpenMP for number of threads/process set by user at run-time @@ -126,14 +128,70 @@ Comm::~Comm() memory->destroy(buf_recv); } +/* ---------------------------------------------------------------------- + set dimensions of processor grid + invoked from input script by processors command +------------------------------------------------------------------------- */ + +void Comm::set_processors(int narg, char **arg) +{ + if (narg < 3) error->all(FLERR,"Illegal processors command"); + + if (strcmp(arg[0],"*") == 0) user_procgrid[0] = 0; + else user_procgrid[0] = atoi(arg[0]); + if (strcmp(arg[1],"*") == 0) user_procgrid[1] = 0; + else user_procgrid[1] = atoi(arg[1]); + if (strcmp(arg[2],"*") == 0) user_procgrid[2] = 0; + else user_procgrid[2] = atoi(arg[2]); + + if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0) + error->all(FLERR,"Illegal processors command"); + + int iarg = 3; + while (iarg < narg) { + if (strcmp(arg[iarg],"part") == 0) { + if (iarg+4 > narg) error->all(FLERR,"Illegal processors command"); + if (universe->nworlds == 1) + error->all(FLERR, + "Cannot use processors part command " + "without using partitions"); + int isend = atoi(arg[iarg+1]); + int irecv = atoi(arg[iarg+2]); + if (isend < 1 || isend > universe->nworlds || + irecv < 1 || irecv > universe->nworlds || isend == irecv) + error->all(FLERR,"Invalid partition in processors part command"); + if (isend-1 == universe->iworld) send_to_partition = irecv-1; + if (irecv-1 == universe->iworld) recv_from_partition = isend-1; + if (strcmp(arg[iarg+3],"multiple") == 0) { + if (universe->iworld == irecv-1) other_partition_style = 0; + } else error->all(FLERR,"Illegal processors command"); + iarg += 4; + } else error->all(FLERR,"Illegal processors command"); + } +} + /* ---------------------------------------------------------------------- setup 3d grid of procs based on box size ------------------------------------------------------------------------- */ -void Comm::set_procs() +void Comm::set_proc_grid() { + // recv proc layout of another partition if my layout depends on it + + if (recv_from_partition >= 0) { + MPI_Status status; + if (me == 0) MPI_Recv(other_procgrid,3,MPI_INT, + universe->root_proc[recv_from_partition],0, + universe->uworld,&status); + MPI_Bcast(other_procgrid,3,MPI_INT,0,world); + } + + // create layout of procs mapped to simulation box + procs2box(); + // error check + if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs) error->all(FLERR,"Bad grid of processors"); if (domain->dimension == 2 && procgrid[2] != 1) @@ -179,6 +237,14 @@ void Comm::set_procs() if (logfile) fprintf(logfile," %d by %d by %d MPI processor grid\n", procgrid[0],procgrid[1],procgrid[2]); } + + // send my proc layout to another partition if requested + + if (send_to_partition >= 0) { + if (me == 0) MPI_Send(procgrid,3,MPI_INT, + universe->root_proc[send_to_partition],0, + universe->uworld); + } } /* ---------------------------------------------------------------------- */ @@ -1149,8 +1215,12 @@ void Comm::procs2box() double bestsurf = 2.0 * (area[0]+area[1]+area[2]); // loop thru all possible factorizations of nprocs - // only consider valid cases that match procgrid settings + // choose factorization with least surface area // surf = surface area of a proc sub-domain + // only consider factorizations that match procgrid & other_procgrid settings + // if set, only consider factorizations that match other_procgrid settings + + procgrid[0] = procgrid[1] = procgrid[2] = 0; int ipx,ipy,ipz,valid; double surf; @@ -1159,6 +1229,9 @@ void Comm::procs2box() while (ipx <= nprocs) { valid = 1; if (user_procgrid[0] && ipx != user_procgrid[0]) valid = 0; + if (other_procgrid[0]) { + if (other_partition_style == 0 && other_procgrid[0] % ipx) valid = 0; + } if (nprocs % ipx) valid = 0; if (!valid) { ipx++; @@ -1169,6 +1242,9 @@ void Comm::procs2box() while (ipy <= nprocs/ipx) { valid = 1; if (user_procgrid[1] && ipy != user_procgrid[1]) valid = 0; + if (other_procgrid[1]) { + if (other_partition_style == 0 && other_procgrid[1] % ipy) valid = 0; + } if ((nprocs/ipx) % ipy) valid = 0; if (!valid) { ipy++; @@ -1178,6 +1254,9 @@ void Comm::procs2box() ipz = nprocs/ipx/ipy; valid = 1; if (user_procgrid[2] && ipz != user_procgrid[2]) valid = 0; + if (other_procgrid[2]) { + if (other_partition_style == 0 && other_procgrid[2] % ipz) valid = 0; + } if (domain->dimension == 2 && ipz != 1) valid = 0; if (!valid) { ipy++; @@ -1196,6 +1275,8 @@ void Comm::procs2box() ipx++; } + + if (procgrid[0] == 0) error->one(FLERR,"Could not layout grid of processors"); } /* ---------------------------------------------------------------------- diff --git a/src/comm.h b/src/comm.h index 049e1ae85fa4508b1a3da6da0b0ade715c2806d0..3dd3d9a5627fbec4093857ec3f3981122619a030 100644 --- a/src/comm.h +++ b/src/comm.h @@ -29,18 +29,24 @@ class Comm : protected Pointers { double cutghost[3]; // cutoffs used for acquiring ghost atoms double cutghostuser; // user-specified ghost cutoff int ***grid2proc; // which proc owns i,j,k loc in 3d grid + int recv_from_partition; // recv proc layout from this partition + int send_to_partition; // send my proc layout to this partition + // -1 if no recv or send + int other_partition_style; // 0 = recv layout dims must be multiple of + // my layout dims int nthreads; // OpenMP threads per MPI process Comm(class LAMMPS *); virtual ~Comm(); virtual void init(); - virtual void set_procs(); // setup 3d grid of procs - virtual void setup(); // setup 3d communication pattern - virtual void forward_comm(int dummy = 0); // forward communication of atom coords - virtual void reverse_comm(); // reverse communication of forces - virtual void exchange(); // move atoms to new procs - virtual void borders(); // setup list of atoms to communicate + void set_processors(int, char **); // set procs from input script + virtual void set_proc_grid(); // setup 3d grid of procs + virtual void setup(); // setup 3d comm pattern + virtual void forward_comm(int dummy = 0); // forward comm of atom coords + virtual void reverse_comm(); // reverse comm of forces + virtual void exchange(); // move atoms to new procs + virtual void borders(); // setup list of atoms to comm virtual void forward_comm_pair(class Pair *); // forward comm from a Pair virtual void reverse_comm_pair(class Pair *); // reverse comm from a Pair @@ -77,6 +83,8 @@ class Comm : protected Pointers { int map_style; // non-0 if global->local mapping is done int bordergroup; // only communicate this group in borders + int other_procgrid[3]; // proc layout + int *firstrecv; // where to put 1st recv atom in each swap int **sendlist; // list of atoms to send in each swap int *maxsendlist; // max size of send list for each swap diff --git a/src/create_box.cpp b/src/create_box.cpp index 7cdb03365775c76eb16661362b6d2166f038e340..be437d0729f459014aac3ac960111df71a8d6985 100644 --- a/src/create_box.cpp +++ b/src/create_box.cpp @@ -110,6 +110,6 @@ void CreateBox::command(int narg, char **arg) domain->print_box("Created "); domain->set_initial_box(); domain->set_global_box(); - comm->set_procs(); + comm->set_proc_grid(); domain->set_local_box(); } diff --git a/src/domain.cpp b/src/domain.cpp index 3140ea6a5487fc97799a24b7d49e63bd7dec0971..54f1f528b1df9f973ac034334590ddbbb5a3ae2a 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -603,7 +603,9 @@ void Domain::minimum_image(double *delta) for triclinic, also add/subtract tilt factors in other dims as needed ------------------------------------------------------------------------- */ -void Domain::closest_image(const double * const xi, const double * const xj, double * const xjimage) +void Domain::closest_image(const double * const xi, + const double * const xj, + double * const xjimage) { double dx,dy,dz; diff --git a/src/force.cpp b/src/force.cpp index 2600b7dbf7755cf8399f8c5ce00c6a49109a9143..c195b39d7baa3dc59bcbbefd5d8ca2785d6961da 100644 --- a/src/force.cpp +++ b/src/force.cpp @@ -534,6 +534,20 @@ KSpace *Force::new_kspace(int narg, char **arg, const char *suffix, int &sflag) return NULL; } +/* ---------------------------------------------------------------------- + return ptr to Kspace class if matches word + if exact, then style name must be exact match to word + if not exact, style name must contain word + return NULL if no match +------------------------------------------------------------------------- */ + +KSpace *Force::kspace_match(const char *word, int exact) +{ + if (exact && strcmp(kspace_style,word) == 0) return kspace; + else if (!exact && strstr(kspace_style,word)) return kspace; + return NULL; +} + /* ---------------------------------------------------------------------- set special bond values ------------------------------------------------------------------------- */ @@ -627,7 +641,7 @@ void Force::set_special(int narg, char **arg) /* ---------------------------------------------------------------------- compute bounds implied by numeric str with a possible wildcard asterik - nmax = upper bound + 1 = lower bound, nmax = upper bound 5 possibilities: (1) i = i to i, (2) * = 1 to nmax, (3) i* = i to nmax, (4) *j = 1 to j, (5) i*j = i to j @@ -639,21 +653,22 @@ void Force::bounds(char *str, int nmax, int &nlo, int &nhi) char *ptr = strchr(str,'*'); if (ptr == NULL) { - nlo = MAX(atoi(str),1); - nhi = MIN(atoi(str),nmax); + nlo = nhi = atoi(str); } else if (strlen(str) == 1) { nlo = 1; nhi = nmax; } else if (ptr == str) { nlo = 1; - nhi = MIN(atoi(ptr+1),nmax); + nhi = atoi(ptr+1); } else if (strlen(ptr+1) == 0) { - nlo = MAX(atoi(str),1); + nlo = atoi(str); nhi = nmax; } else { - nlo = MAX(atoi(str),1); - nhi = MIN(atoi(ptr+1),nmax); + nlo = atoi(str); + nhi = atoi(ptr+1); } + + if (nlo < 1 || nhi > nmax) error->all(FLERR,"Numeric index is out of bounds"); } /* ---------------------------------------------------------------------- diff --git a/src/force.h b/src/force.h index a7089070d222af385a7ca0214bae3edec078cb15..bafa38acf93425da8c9f3ba4b525f77888cee5d0 100644 --- a/src/force.h +++ b/src/force.h @@ -88,6 +88,7 @@ class Force : protected Pointers { void create_kspace(int, char **, const char *suffix = NULL); class KSpace *new_kspace(int, char **, const char *, int &); + class KSpace *kspace_match(const char *, int); void set_special(int, char **); void bounds(char *, int, int &, int &); diff --git a/src/input.cpp b/src/input.cpp index 1345ef370e585ca5b74e891df58338a50b5cdd08..b0358004d8b735120921ce19795ff542d0a2311c 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -418,6 +418,7 @@ int Input::execute_command() else if (!strcmp(command,"label")) label(); else if (!strcmp(command,"log")) log(); else if (!strcmp(command,"next")) next_command(); + else if (!strcmp(command,"partition")) partition(); else if (!strcmp(command,"print")) print(); else if (!strcmp(command,"shell")) shell(); else if (!strcmp(command,"variable")) variable_command(); @@ -580,7 +581,7 @@ void Input::ifthenelse() ncommands++; } - for (int i = 0; i < ncommands; i++) input->one(commands[i]); + for (int i = 0; i < ncommands; i++) one(commands[i]); for (int i = 0; i < ncommands; i++) delete [] commands[i]; delete [] commands; @@ -636,7 +637,7 @@ void Input::ifthenelse() // execute the list of commands - for (int i = 0; i < ncommands; i++) input->one(commands[i]); + for (int i = 0; i < ncommands; i++) one(commands[i]); // clean up @@ -742,6 +743,40 @@ void Input::next_command() /* ---------------------------------------------------------------------- */ +void Input::partition() +{ + if (narg < 3) error->all(FLERR,"Illegal partition command"); + + int yesflag; + if (strcmp(arg[0],"yes") == 0) yesflag = 1; + else if (strcmp(arg[0],"no") == 0) yesflag = 0; + else error->all(FLERR,"Illegal partition command"); + + int ilo,ihi; + force->bounds(arg[1],universe->nworlds,ilo,ihi); + + // copy original line to copy, since will use strtok() on it + // ptr = start of 4th word + + strcpy(copy,line); + copy[strlen(copy)-1] = '\0'; + char *ptr = strtok(copy," \t\n\r\f"); + ptr = strtok(NULL," \t\n\r\f"); + ptr = strtok(NULL," \t\n\r\f"); + ptr += strlen(ptr) + 1; + ptr += strspn(ptr," \t\n\r\f"); + + // execute the remaining command line on requested partitions + + if (yesflag) { + if (universe->iworld+1 >= ilo && universe->iworld+1 <= ihi) one(ptr); + } else { + if (universe->iworld+1 < ilo || universe->iworld+1 > ihi) one(ptr); + } +} + +/* ---------------------------------------------------------------------- */ + void Input::print() { if (narg != 1) error->all(FLERR,"Illegal print command"); @@ -1204,19 +1239,9 @@ void Input::pair_write() void Input::processors() { - if (narg != 3) error->all(FLERR,"Illegal processors command"); if (domain->box_exist) error->all(FLERR,"Processors command after simulation box is defined"); - - if (strcmp(arg[0],"*") == 0) comm->user_procgrid[0] = 0; - else comm->user_procgrid[0] = atoi(arg[0]); - if (strcmp(arg[1],"*") == 0) comm->user_procgrid[1] = 0; - else comm->user_procgrid[1] = atoi(arg[1]); - if (strcmp(arg[2],"*") == 0) comm->user_procgrid[2] = 0; - else comm->user_procgrid[2] = atoi(arg[2]); - - if (comm->user_procgrid[0] < 0 || comm->user_procgrid[1] < 0 || - comm->user_procgrid[2] < 0) error->all(FLERR,"Illegal processors command"); + comm->set_processors(narg,arg); } /* ---------------------------------------------------------------------- */ diff --git a/src/input.h b/src/input.h index 2e3259b9fe28cd9ac284b3b067dd6d7a4931df3e..fd08a6098268e02c07ae4e757d511d1a52e5130f 100644 --- a/src/input.h +++ b/src/input.h @@ -57,6 +57,7 @@ class Input : protected Pointers { void label(); void log(); void next_command(); + void partition(); void print(); void shell(); void variable_command(); diff --git a/src/read_data.cpp b/src/read_data.cpp index a96a566b017486783fce76428be037818d17b30e..872076b112fa5e706773d82025eec9e8e1d25a1c 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -137,7 +137,7 @@ void ReadData::command(int narg, char **arg) domain->print_box(" "); domain->set_initial_box(); domain->set_global_box(); - comm->set_procs(); + comm->set_proc_grid(); domain->set_local_box(); // customize for new sections diff --git a/src/read_restart.cpp b/src/read_restart.cpp index dadc29b64fd2c0ec5585062f352e86671a20203f..02e313a8d08474bbc8c5c3a7f9898730ab6ebc60 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -135,7 +135,7 @@ void ReadRestart::command(int narg, char **arg) domain->print_box(" "); domain->set_initial_box(); domain->set_global_box(); - comm->set_procs(); + comm->set_proc_grid(); domain->set_local_box(); // read groups, ntype-length arrays, force field, fix info from file diff --git a/src/replicate.cpp b/src/replicate.cpp index da6a662069c94900b428a6418c6022490f0e16c1..ce53825f319c186ede357aac4159908614cc3102 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -193,7 +193,7 @@ void Replicate::command(int narg, char **arg) domain->print_box(" "); domain->set_initial_box(); domain->set_global_box(); - comm->set_procs(); + comm->set_proc_grid(); domain->set_local_box(); // copy type arrays to new atom class diff --git a/src/verlet_split.cpp b/src/verlet_split.cpp new file mode 100644 index 0000000000000000000000000000000000000000..39d7185800682bf3350642622fcd40156e146067 --- /dev/null +++ b/src/verlet_split.cpp @@ -0,0 +1,480 @@ +/* ------------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Yuxing Peng and Chris Knight (U Chicago) +------------------------------------------------------------------------- */ + +#include "string.h" +#include "verlet_split.h" +#include "universe.h" +#include "neighbor.h" +#include "domain.h" +#include "comm.h" +#include "atom.h" +#include "atom_vec.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "dihedral.h" +#include "improper.h" +#include "kspace.h" +#include "output.h" +#include "update.h" +#include "modify.h" +#include "timer.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +VerletSplit::VerletSplit(LAMMPS *lmp, int narg, char **arg) : + Verlet(lmp, narg, arg) +{ + // error checks on partitions + + if (universe->nworlds != 2) + error->universe_all(FLERR,"Verlet/split requires 2 partitions"); + if (universe->procs_per_world[0] % universe->procs_per_world[1]) + error->universe_all(FLERR,"Verlet/split requires Rspace partition " + "size be multiple of Kspace partition size"); + + // master = 1 for Rspace procs, 0 for Kspace procs + + if (universe->iworld == 0) master = 1; + else master = 0; + + ratio = universe->procs_per_world[0] / universe->procs_per_world[1]; + + // Kspace root proc broadcasts info about Kspace proc layout to Rspace procs + + int kspace_procgrid[3]; + + if (universe->me == universe->root_proc[1]) { + kspace_procgrid[0] = comm->procgrid[0]; + kspace_procgrid[1] = comm->procgrid[1]; + kspace_procgrid[2] = comm->procgrid[2]; + } + MPI_Bcast(kspace_procgrid,3,MPI_INT,universe->root_proc[1],universe->uworld); + + int ***kspace_grid2proc; + memory->create(kspace_grid2proc,kspace_procgrid[0], + kspace_procgrid[1],kspace_procgrid[2], + "verlet/split:kspace_grid2proc"); + + if (universe->me == universe->root_proc[1]) { + for (int i = 0; i < comm->procgrid[0]; i++) + for (int j = 0; j < comm->procgrid[1]; j++) + for (int k = 0; k < comm->procgrid[2]; k++) + kspace_grid2proc[i][j][k] = comm->grid2proc[i][j][k]; + } + MPI_Bcast(&kspace_grid2proc[0][0][0], + kspace_procgrid[0]*kspace_procgrid[1]*kspace_procgrid[2],MPI_INT, + universe->root_proc[1],universe->uworld); + + // Rspace partition must be multiple of Kspace partition in each dim + // so atoms of one Kspace proc coincide with atoms of several Rspace procs + + if (master) { + int flag = 0; + if (comm->procgrid[0] % kspace_procgrid[0]) flag = 1; + if (comm->procgrid[1] % kspace_procgrid[1]) flag = 1; + if (comm->procgrid[2] % kspace_procgrid[2]) flag = 1; + if (flag) + error->one(FLERR, + "Verlet/split requires Rspace partition layout be " + "multiple of Kspace partition layout in each dim"); + } + + // block = 1 Kspace proc with set of Rspace procs it overlays + // me_block = 0 for Kspace proc + // me_block = 1 to ratio for Rspace procs + // block = MPI communicator for that set of procs + + int iblock,key; + + if (!master) { + iblock = comm->me; + key = 0; + } else { + int kpx = comm->myloc[0] / (comm->procgrid[0]/kspace_procgrid[0]); + int kpy = comm->myloc[1] / (comm->procgrid[1]/kspace_procgrid[1]); + int kpz = comm->myloc[2] / (comm->procgrid[2]/kspace_procgrid[2]); + iblock = kspace_grid2proc[kpx][kpy][kpz]; + key = 1; + } + + MPI_Comm_split(universe->uworld,iblock,key,&block); + MPI_Comm_rank(block,&me_block); + + // output block groupings to universe screen/logfile + // bmap is ordered by block and then by proc within block + + int *bmap = new int[universe->nprocs]; + for (int i = 0; i < universe->nprocs; i++) bmap[i] = -1; + bmap[iblock*(ratio+1)+me_block] = universe->me; + + int *bmapall = new int[universe->nprocs]; + MPI_Allreduce(bmap,bmapall,universe->nprocs,MPI_INT,MPI_MAX,universe->uworld); + + if (universe->me == 0) { + if (universe->uscreen) { + fprintf(universe->uscreen,"Rspace/Kspace procs in each block:\n"); + int m = 0; + for (int i = 0; i < universe->nprocs/(ratio+1); i++) { + fprintf(universe->uscreen," block %d:",i); + int kspace_proc = bmapall[m++]; + for (int j = 1; j <= ratio; j++) + fprintf(universe->uscreen," %d",bmapall[m++]); + fprintf(universe->uscreen," %d\n",kspace_proc); + } + } + if (universe->ulogfile) { + fprintf(universe->ulogfile,"Rspace/Kspace procs in each block:\n"); + int m = 0; + for (int i = 0; i < universe->nprocs/(ratio+1); i++) { + fprintf(universe->ulogfile," block %d:",i); + int kspace_proc = bmapall[m++]; + for (int j = 1; j <= ratio; j++) + fprintf(universe->ulogfile," %d",bmapall[m++]); + fprintf(universe->ulogfile," %d\n",kspace_proc); + } + } + } + + memory->destroy(kspace_grid2proc); + delete [] bmap; + delete [] bmapall; + + // size/disp = vectors for MPI gather/scatter within block + + qsize = new int[ratio+1]; + qdisp = new int[ratio+1]; + xsize = new int[ratio+1]; + xdisp = new int[ratio+1]; + + // f_kspace = Rspace copy of Kspace forces + // allocate dummy version for Kspace partition + + maxatom = 0; + f_kspace = NULL; + if (!master) memory->create(f_kspace,1,1,"verlet/split:f_kspace"); +} + +/* ---------------------------------------------------------------------- */ + +VerletSplit::~VerletSplit() +{ + delete [] qsize; + delete [] qdisp; + delete [] xsize; + delete [] xdisp; + memory->destroy(f_kspace); + MPI_Comm_free(&block); +} + +/* ---------------------------------------------------------------------- + initialization before run +------------------------------------------------------------------------- */ + +void VerletSplit::init() +{ + if (!force->kspace && comm->me == 0) + error->warning(FLERR,"No Kspace calculation with verlet/split"); + + if (force->kspace_match("tip4p",0)) tip4p_flag = 1; + else tip4p_flag = 0; + + Verlet::init(); +} + +/* ---------------------------------------------------------------------- + run for N steps + master partition does everything but Kspace + servant partition does just Kspace + communicate back and forth every step: + atom coords from master -> servant + kspace forces from servant -> master + also box bounds from master -> servant if necessary +------------------------------------------------------------------------- */ + +void VerletSplit::run(int n) +{ + int nflag,ntimestep,sortflag; + + // reset LOOP timer due to imbalance in setup() on 2 partitions + // setup initial Rspace <-> Kspace comm params + + MPI_Barrier(universe->uworld); + timer->array[TIME_LOOP] = MPI_Wtime(); + rk_setup(); + + // flags for timestepping iterations + + int n_post_integrate = modify->n_post_integrate; + int n_pre_exchange = modify->n_pre_exchange; + int n_pre_neighbor = modify->n_pre_neighbor; + int n_pre_force = modify->n_pre_force; + int n_post_force = modify->n_post_force; + int n_end_of_step = modify->n_end_of_step; + + if (atom->sortfreq > 0) sortflag = 1; + else sortflag = 0; + + for (int i = 0; i < n; i++) { + + ntimestep = ++update->ntimestep; + ev_set(ntimestep); + + // initial time integration + + if (master) { + modify->initial_integrate(vflag); + if (n_post_integrate) modify->post_integrate(); + } + + // regular communication vs neighbor list rebuild + + if (master) nflag = neighbor->decide(); + MPI_Bcast(&nflag,1,MPI_INT,1,block); + + if (master) { + if (nflag == 0) { + timer->stamp(); + comm->forward_comm(); + timer->stamp(TIME_COMM); + } else { + if (n_pre_exchange) modify->pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + if (domain->box_change) { + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + } + timer->stamp(); + comm->exchange(); + if (sortflag && ntimestep >= atom->nextsort) atom->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(TIME_COMM); + if (n_pre_neighbor) modify->pre_neighbor(); + neighbor->build(); + timer->stamp(TIME_NEIGHBOR); + } + } + + // if reneighboring occurred, re-setup Rspace <-> Kspace comm params + // comm Rspace atom coords to Kspace procs + + if (nflag) rk_setup(); + r2k_comm(); + + // force computations + + force_clear(); + + if (master) { + if (n_pre_force) modify->pre_force(vflag); + + timer->stamp(); + if (force->pair) { + force->pair->compute(eflag,vflag); + timer->stamp(TIME_PAIR); + } + + if (atom->molecular) { + if (force->bond) force->bond->compute(eflag,vflag); + if (force->angle) force->angle->compute(eflag,vflag); + if (force->dihedral) force->dihedral->compute(eflag,vflag); + if (force->improper) force->improper->compute(eflag,vflag); + timer->stamp(TIME_BOND); + } + + if (force->newton) { + comm->reverse_comm(); + timer->stamp(TIME_COMM); + } + + } else { + if (force->kspace) { + timer->stamp(); + force->kspace->compute(eflag,vflag); + timer->stamp(TIME_KSPACE); + } + } + + // comm and sum Kspace forces back to Rspace procs + + k2r_comm(); + + // force modifications, final time integration, diagnostics + // all output + + if (master) { + if (n_post_force) modify->post_force(vflag); + modify->final_integrate(); + if (n_end_of_step) modify->end_of_step(); + + if (ntimestep == output->next) { + timer->stamp(); + output->write(ntimestep); + timer->stamp(TIME_OUTPUT); + } + } + } +} + +/* ---------------------------------------------------------------------- + setup params for Rspace <-> Kspace communication + called initially and after every reneighbor + also communcicate atom charges from Rspace to KSpace since static +------------------------------------------------------------------------- */ + +void VerletSplit::rk_setup() +{ + // grow f_kspace array on master procs if necessary + + if (master) { + if (atom->nlocal > maxatom) { + memory->destroy(f_kspace); + maxatom = atom->nmax; + memory->create(f_kspace,maxatom,3,"verlet/split:f_kspace"); + } + } + + // qsize = # of atoms owned by each master proc in block + + int n = 0; + if (master) n = atom->nlocal; + MPI_Gather(&n,1,MPI_INT,qsize,1,MPI_INT,0,block); + + // setup qdisp, xsize, xdisp based on qsize + // only needed by Kspace proc + // set Kspace nlocal to sum of Rspace nlocals + // insure Kspace atom arrays are large enough + + if (!master) { + qsize[0] = qdisp[0] = xsize[0] = xdisp[0] = 0; + for (int i = 1; i <= ratio; i++) { + qdisp[i] = qdisp[i-1]+qsize[i-1]; + xsize[i] = 3*qsize[i]; + xdisp[i] = xdisp[i-1]+xsize[i-1]; + } + + atom->nlocal = qdisp[ratio] + qsize[ratio]; + while (atom->nmax <= atom->nlocal) atom->avec->grow(0); + atom->nghost = 0; + } + + // one-time gather of Rspace atom charges to Kspace proc + + MPI_Gatherv(atom->q,n,MPI_DOUBLE,atom->q,qsize,qdisp,MPI_DOUBLE,0,block); + + // for TIP4P also need to send type and tag + // KSpace proc needs to acquire ghost atoms and map all its atoms + + if (tip4p_flag) { + MPI_Gatherv(atom->type,n,MPI_INT,atom->type,qsize,qdisp,MPI_INT,0,block); + MPI_Gatherv(atom->tag,n,MPI_INT,atom->tag,qsize,qdisp,MPI_INT,0,block); + if (!master) { + if (triclinic) domain->x2lamda(atom->nlocal); + if (domain->box_change) comm->setup(); + timer->stamp(); + atom->map_clear(); // in lieu of comm->exchange() + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(TIME_COMM); + } + } +} + +/* ---------------------------------------------------------------------- + communicate Rspace atom coords to Kspace + also eflag,vflag and box bounds if needed +------------------------------------------------------------------------- */ + +void VerletSplit::r2k_comm() +{ + MPI_Status status; + + int n = 0; + if (master) n = atom->nlocal; + MPI_Gatherv(atom->x[0],n*3,MPI_DOUBLE,atom->x[0],xsize,xdisp, + MPI_DOUBLE,0,block); + + // send eflag,vflag from Rspace to Kspace + + if (me_block == 1) { + int flags[2]; + flags[0] = eflag; flags[1] = vflag; + MPI_Send(flags,2,MPI_INT,0,0,block); + } else if (!master) { + int flags[2]; + MPI_Recv(flags,2,MPI_DOUBLE,1,0,block,&status); + eflag = flags[0]; vflag = flags[1]; + } + + // send box bounds from Rspace to Kspace if simulation box is dynamic + + if (domain->box_change) { + if (me_block == 1) { + MPI_Send(domain->boxlo,3,MPI_DOUBLE,0,0,block); + MPI_Send(domain->boxhi,3,MPI_DOUBLE,0,0,block); + } else if (!master) { + MPI_Recv(domain->boxlo,3,MPI_DOUBLE,1,0,block,&status); + MPI_Recv(domain->boxhi,3,MPI_DOUBLE,1,0,block,&status); + domain->set_global_box(); + domain->set_local_box(); + force->kspace->setup(); + } + } +} + +/* ---------------------------------------------------------------------- + communicate and sum Kspace atom forces back to Rspace +------------------------------------------------------------------------- */ + +void VerletSplit::k2r_comm() +{ + if (eflag) MPI_Bcast(&force->kspace->energy,1,MPI_DOUBLE,0,block); + if (vflag) MPI_Bcast(force->kspace->virial,6,MPI_DOUBLE,0,block); + + int n = 0; + if (master) n = atom->nlocal; + MPI_Scatterv(atom->f[0],xsize,xdisp,MPI_DOUBLE, + f_kspace[0],n*3,MPI_DOUBLE,0,block); + + if (master) { + double **f = atom->f; + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + f[i][0] += f_kspace[i][0]; + f[i][1] += f_kspace[i][1]; + f[i][2] += f_kspace[i][2]; + } + } +} + +/* ---------------------------------------------------------------------- + memory usage of Kspace force array on master procs +------------------------------------------------------------------------- */ + +bigint VerletSplit::memory_usage() +{ + bigint bytes = maxatom*3 * sizeof(double); + return bytes; +} diff --git a/src/verlet_split.h b/src/verlet_split.h new file mode 100644 index 0000000000000000000000000000000000000000..080a6057eb1fcba7ccf352a6cc3f73d48f3a66a3 --- /dev/null +++ b/src/verlet_split.h @@ -0,0 +1,54 @@ +/* ------------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef INTEGRATE_CLASS + +IntegrateStyle(verlet/split,VerletSplit) + +#else + +#ifndef LMP_VERLET_SPLIT_H +#define LMP_VERLET_SPLIT_H + +#include "verlet.h" + +namespace LAMMPS_NS { + +class VerletSplit : public Verlet { + public: + VerletSplit(class LAMMPS *, int, char **); + ~VerletSplit(); + void init(); + void run(int); + bigint memory_usage(); + + private: + int master; // 1 if an Rspace proc, 0 if Kspace + int me_block; // proc ID within Rspace/Kspace block + int ratio; // ratio of Rspace procs to Kspace procs + int *qsize,*qdisp,*xsize,*xdisp; // MPI gather/scatter params for block comm + MPI_Comm block; // communicator within one block + int tip4p_flag; // 1 if PPPM/tip4p so do extra comm + + double **f_kspace; // copy of Kspace forces on Rspace procs + int maxatom; + + void rk_setup(); + void r2k_comm(); + void k2r_comm(); +}; + +} + +#endif +#endif