diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 4e810bbc2378c5bc37f9f5b59b09c675e6193cf1..34b17533ea66823d9c8cc0297e81b3abf55743bd 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -115,7 +115,8 @@ void PPPM::init()
 
   if (domain->triclinic)
     error->all(FLERR,"Cannot (yet) use PPPM with triclinic box");
-  if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPM with 2d simulation");
+  if (domain->dimension == 2) error->all(FLERR,
+					 "Cannot use PPPM with 2d simulation");
 
   if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
 
@@ -186,8 +187,8 @@ void PPPM::init()
 
   // if we have a /proxy pppm version check if the pair style is compatible
 
-  if ( (strcmp(force->kspace_style,"pppm/proxy") == 0) ||
-       (strcmp(force->kspace_style,"pppm/tip4p/proxy") == 0) ) {
+  if ((strcmp(force->kspace_style,"pppm/proxy") == 0) ||
+      (strcmp(force->kspace_style,"pppm/tip4p/proxy") == 0) ) {
     if (force->pair == NULL)
       error->all(FLERR,"KSpace style is incompatible with Pair style");
     if (strstr(force->pair_style,"pppm/") == NULL )
@@ -1507,7 +1508,8 @@ void PPPM::particle_map()
 
     if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
 	ny+nlower < nylo_out || ny+nupper > nyhi_out ||
-	nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag = 1;
+	nz+nlower < nzlo_out || nz+nupper > nzhi_out)
+      flag = 1;
   }
 
   if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
@@ -1527,7 +1529,8 @@ void PPPM::make_rho()
 
   // clear 3d density array
 
-  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
+  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
+	 ngrid*sizeof(FFT_SCALAR));
 
   // loop over my charges, add their contribution to nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@@ -1784,6 +1787,7 @@ void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
    charge assignment into rho1d
    dx,dy,dz = distance of particle from "lower left" grid point 
 ------------------------------------------------------------------------- */
+
 void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
 			 const FFT_SCALAR &dz)
 {
diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h
index 1ee2cfbb4f3e408fddde72d63ac4ca19800efbe5..b3dbf1779c75a5cc3a56edeca021eb31eaa6b70c 100644
--- a/src/KSPACE/pppm.h
+++ b/src/KSPACE/pppm.h
@@ -1,4 +1,4 @@
-/* -*- c++ -*- ----------------------------------------------------------
+/* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
diff --git a/src/PERI/pair_peri_lps.cpp b/src/PERI/pair_peri_lps.cpp
index ab747109493b430e1cef082fdcc62ba041097ea5..10002f8d45735ffb229f92fc1002b2806862434a 100644
--- a/src/PERI/pair_peri_lps.cpp
+++ b/src/PERI/pair_peri_lps.cpp
@@ -41,7 +41,7 @@ using namespace LAMMPS_NS;
 PairPeriLPS::PairPeriLPS(LAMMPS *lmp) : Pair(lmp)			
 {
   for (int i = 0; i < 6; i++) virial[i] = 0.0;
-  no_virial_fdotr_compute=1;
+  no_virial_fdotr_compute = 1;
 
   ifix_peri = -1;
 
@@ -189,7 +189,8 @@ void PairPeriLPS::compute(int eflag, int vflag)
         }
 
         if (eflag) evdwl = 0.5*rk*dr;
-	if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair*vfrac[i],delx,dely,delz);
+	if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,
+			     fpair*vfrac[i],delx,dely,delz);
       }
     }
   }
@@ -303,7 +304,8 @@ void PairPeriLPS::compute(int eflag, int vflag)
       if (eflag) evdwl = 0.5 * 15 * (shearmodulus[itype][itype]/wvolume[i]) * 
 		   omega_plus*(deviatoric_extension * deviatoric_extension) *
 		   vfrac[j] * vfrac_scale;
-      if (evflag) ev_tally(i,i,nlocal,0,0.5*evdwl,0.0,0.5*fbond*vfrac[i],delx,dely,delz);
+      if (evflag) ev_tally(i,i,nlocal,0,0.5*evdwl,0.0,
+			   0.5*fbond*vfrac[i],delx,dely,delz);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
@@ -316,7 +318,8 @@ void PairPeriLPS::compute(int eflag, int vflag)
       if (first)
          s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
       else
-         s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch));
+         s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - 
+			 (alpha[itype][jtype] * stretch));
 
       first = false;
     }
diff --git a/src/comm.cpp b/src/comm.cpp
index 1651e0dfd7eba67f5d5824b5c9e576172ff3861d..8309382b78d6032d5aa998ad730a4c93fd515ba2 100644
--- a/src/comm.cpp
+++ b/src/comm.cpp
@@ -22,6 +22,7 @@
 #include "stdio.h"
 #include "stdlib.h"
 #include "comm.h"
+#include "universe.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
@@ -68,6 +69,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
   cutghostmulti = NULL;
   cutghostuser = 0.0;
   ghost_velocity = 0;
+  recv_from_partition = send_to_partition = -1;
 
   // use of OpenMP threads
   // query OpenMP for number of threads/process set by user at run-time
@@ -126,14 +128,70 @@ Comm::~Comm()
   memory->destroy(buf_recv);
 }
 
+/* ----------------------------------------------------------------------
+   set dimensions of processor grid
+   invoked from input script by processors command
+------------------------------------------------------------------------- */
+
+void Comm::set_processors(int narg, char **arg)
+{
+  if (narg < 3) error->all(FLERR,"Illegal processors command");
+
+  if (strcmp(arg[0],"*") == 0) user_procgrid[0] = 0;
+  else user_procgrid[0] = atoi(arg[0]);
+  if (strcmp(arg[1],"*") == 0) user_procgrid[1] = 0;
+  else user_procgrid[1] = atoi(arg[1]);
+  if (strcmp(arg[2],"*") == 0) user_procgrid[2] = 0;
+  else user_procgrid[2] = atoi(arg[2]);
+
+  if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0) 
+    error->all(FLERR,"Illegal processors command");
+
+  int iarg = 3;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"part") == 0) {
+      if (iarg+4 > narg) error->all(FLERR,"Illegal processors command");
+      if (universe->nworlds == 1)
+	error->all(FLERR,
+		   "Cannot use processors part command "
+		   "without using partitions");
+      int isend = atoi(arg[iarg+1]);
+      int irecv = atoi(arg[iarg+2]);
+      if (isend < 1 || isend > universe->nworlds ||
+	  irecv < 1 || irecv > universe->nworlds || isend == irecv)
+	error->all(FLERR,"Invalid partition in processors part command");
+      if (isend-1 == universe->iworld) send_to_partition = irecv-1;
+      if (irecv-1 == universe->iworld) recv_from_partition = isend-1;
+      if (strcmp(arg[iarg+3],"multiple") == 0) {
+	if (universe->iworld == irecv-1) other_partition_style = 0;
+      } else error->all(FLERR,"Illegal processors command");
+      iarg += 4;
+    } else error->all(FLERR,"Illegal processors command");
+  }
+}
+
 /* ----------------------------------------------------------------------
    setup 3d grid of procs based on box size
 ------------------------------------------------------------------------- */
 
-void Comm::set_procs()
+void Comm::set_proc_grid()
 {
+  // recv proc layout of another partition if my layout depends on it
+
+  if (recv_from_partition >= 0) {
+    MPI_Status status;
+    if (me == 0) MPI_Recv(other_procgrid,3,MPI_INT,
+			  universe->root_proc[recv_from_partition],0,
+			  universe->uworld,&status);
+    MPI_Bcast(other_procgrid,3,MPI_INT,0,world);
+  }
+
+  // create layout of procs mapped to simulation box
+
   procs2box();
 
+  // error check
+
   if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs)
     error->all(FLERR,"Bad grid of processors");
   if (domain->dimension == 2 && procgrid[2] != 1)
@@ -179,6 +237,14 @@ void Comm::set_procs()
     if (logfile) fprintf(logfile,"  %d by %d by %d MPI processor grid\n",
 			 procgrid[0],procgrid[1],procgrid[2]);
   }
+
+  // send my proc layout to another partition if requested
+
+  if (send_to_partition >= 0) {
+    if (me == 0) MPI_Send(procgrid,3,MPI_INT,
+			  universe->root_proc[send_to_partition],0,
+			  universe->uworld);
+  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -1149,8 +1215,12 @@ void Comm::procs2box()
   double bestsurf = 2.0 * (area[0]+area[1]+area[2]);
 
   // loop thru all possible factorizations of nprocs
-  // only consider valid cases that match procgrid settings
+  // choose factorization with least surface area
   // surf = surface area of a proc sub-domain
+  // only consider factorizations that match procgrid & other_procgrid settings
+  // if set, only consider factorizations that match other_procgrid settings
+
+  procgrid[0] = procgrid[1] = procgrid[2] = 0;
 
   int ipx,ipy,ipz,valid;
   double surf;
@@ -1159,6 +1229,9 @@ void Comm::procs2box()
   while (ipx <= nprocs) {
     valid = 1;
     if (user_procgrid[0] && ipx != user_procgrid[0]) valid = 0;
+    if (other_procgrid[0]) {
+      if (other_partition_style == 0 && other_procgrid[0] % ipx) valid = 0;
+    }
     if (nprocs % ipx) valid = 0;
     if (!valid) {
       ipx++;
@@ -1169,6 +1242,9 @@ void Comm::procs2box()
     while (ipy <= nprocs/ipx) {
       valid = 1;
       if (user_procgrid[1] && ipy != user_procgrid[1]) valid = 0;
+      if (other_procgrid[1]) {
+	if (other_partition_style == 0 && other_procgrid[1] % ipy) valid = 0;
+      }
       if ((nprocs/ipx) % ipy) valid = 0;
       if (!valid) {
 	ipy++;
@@ -1178,6 +1254,9 @@ void Comm::procs2box()
       ipz = nprocs/ipx/ipy;
       valid = 1;
       if (user_procgrid[2] && ipz != user_procgrid[2]) valid = 0;
+      if (other_procgrid[2]) {
+	if (other_partition_style == 0 && other_procgrid[2] % ipz) valid = 0;
+      }
       if (domain->dimension == 2 && ipz != 1) valid = 0;
       if (!valid) {
 	ipy++;
@@ -1196,6 +1275,8 @@ void Comm::procs2box()
 
     ipx++;
   }
+
+  if (procgrid[0] == 0) error->one(FLERR,"Could not layout grid of processors");
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/comm.h b/src/comm.h
index 049e1ae85fa4508b1a3da6da0b0ade715c2806d0..3dd3d9a5627fbec4093857ec3f3981122619a030 100644
--- a/src/comm.h
+++ b/src/comm.h
@@ -29,18 +29,24 @@ class Comm : protected Pointers {
   double cutghost[3];               // cutoffs used for acquiring ghost atoms
   double cutghostuser;              // user-specified ghost cutoff
   int ***grid2proc;                 // which proc owns i,j,k loc in 3d grid
+  int recv_from_partition;          // recv proc layout from this partition
+  int send_to_partition;            // send my proc layout to this partition
+                                    // -1 if no recv or send
+  int other_partition_style;        // 0 = recv layout dims must be multiple of
+                                    //     my layout dims
   int nthreads;                     // OpenMP threads per MPI process
 
   Comm(class LAMMPS *);
   virtual ~Comm();
 
   virtual void init();
-  virtual void set_procs();                 // setup 3d grid of procs
-  virtual void setup();                     // setup 3d communication pattern
-  virtual void forward_comm(int dummy = 0); // forward communication of atom coords
-  virtual void reverse_comm();              // reverse communication of forces
-  virtual void exchange();                  // move atoms to new procs
-  virtual void borders();                   // setup list of atoms to communicate
+  void set_processors(int, char **);          // set procs from input script
+  virtual void set_proc_grid();               // setup 3d grid of procs
+  virtual void setup();                       // setup 3d comm pattern
+  virtual void forward_comm(int dummy = 0);   // forward comm of atom coords
+  virtual void reverse_comm();                // reverse comm of forces
+  virtual void exchange();                    // move atoms to new procs
+  virtual void borders();                     // setup list of atoms to comm
 
   virtual void forward_comm_pair(class Pair *);    // forward comm from a Pair
   virtual void reverse_comm_pair(class Pair *);    // reverse comm from a Pair
@@ -77,6 +83,8 @@ class Comm : protected Pointers {
   int map_style;                    // non-0 if global->local mapping is done
   int bordergroup;                  // only communicate this group in borders
 
+  int other_procgrid[3];            // proc layout
+
   int *firstrecv;                   // where to put 1st recv atom in each swap
   int **sendlist;                   // list of atoms to send in each swap
   int *maxsendlist;                 // max size of send list for each swap
diff --git a/src/create_box.cpp b/src/create_box.cpp
index 7cdb03365775c76eb16661362b6d2166f038e340..be437d0729f459014aac3ac960111df71a8d6985 100644
--- a/src/create_box.cpp
+++ b/src/create_box.cpp
@@ -110,6 +110,6 @@ void CreateBox::command(int narg, char **arg)
   domain->print_box("Created ");
   domain->set_initial_box();
   domain->set_global_box();
-  comm->set_procs();
+  comm->set_proc_grid();
   domain->set_local_box();
 }
diff --git a/src/domain.cpp b/src/domain.cpp
index 3140ea6a5487fc97799a24b7d49e63bd7dec0971..54f1f528b1df9f973ac034334590ddbbb5a3ae2a 100644
--- a/src/domain.cpp
+++ b/src/domain.cpp
@@ -603,7 +603,9 @@ void Domain::minimum_image(double *delta)
    for triclinic, also add/subtract tilt factors in other dims as needed
 ------------------------------------------------------------------------- */
 
-void Domain::closest_image(const double * const xi, const double * const xj, double * const xjimage)
+void Domain::closest_image(const double * const xi, 
+			   const double * const xj,
+			   double * const xjimage)
 {
   double dx,dy,dz;
 
diff --git a/src/force.cpp b/src/force.cpp
index 2600b7dbf7755cf8399f8c5ce00c6a49109a9143..c195b39d7baa3dc59bcbbefd5d8ca2785d6961da 100644
--- a/src/force.cpp
+++ b/src/force.cpp
@@ -534,6 +534,20 @@ KSpace *Force::new_kspace(int narg, char **arg, const char *suffix, int &sflag)
   return NULL;
 }
 
+/* ----------------------------------------------------------------------
+   return ptr to Kspace class if matches word
+   if exact, then style name must be exact match to word
+   if not exact, style name must contain word
+   return NULL if no match
+------------------------------------------------------------------------- */
+
+KSpace *Force::kspace_match(const char *word, int exact)
+{
+  if (exact && strcmp(kspace_style,word) == 0) return kspace;
+  else if (!exact && strstr(kspace_style,word)) return kspace;
+  return NULL;
+}
+
 /* ----------------------------------------------------------------------
    set special bond values 
 ------------------------------------------------------------------------- */
@@ -627,7 +641,7 @@ void Force::set_special(int narg, char **arg)
 
 /* ----------------------------------------------------------------------
    compute bounds implied by numeric str with a possible wildcard asterik
-   nmax = upper bound
+   1 = lower bound, nmax = upper bound
    5 possibilities:
      (1) i = i to i, (2) * = 1 to nmax,
      (3) i* = i to nmax, (4) *j = 1 to j, (5) i*j = i to j
@@ -639,21 +653,22 @@ void Force::bounds(char *str, int nmax, int &nlo, int &nhi)
   char *ptr = strchr(str,'*');
 
   if (ptr == NULL) {
-    nlo = MAX(atoi(str),1);
-    nhi = MIN(atoi(str),nmax);
+    nlo = nhi = atoi(str);
   } else if (strlen(str) == 1) {
     nlo = 1;
     nhi = nmax;
   } else if (ptr == str) {
     nlo = 1;
-    nhi = MIN(atoi(ptr+1),nmax);
+    nhi = atoi(ptr+1);
   } else if (strlen(ptr+1) == 0) {
-    nlo = MAX(atoi(str),1);
+    nlo = atoi(str);
     nhi = nmax;
   } else {
-    nlo = MAX(atoi(str),1);
-    nhi = MIN(atoi(ptr+1),nmax);
+    nlo = atoi(str);
+    nhi = atoi(ptr+1);
   }
+
+  if (nlo < 1 || nhi > nmax) error->all(FLERR,"Numeric index is out of bounds");
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/force.h b/src/force.h
index a7089070d222af385a7ca0214bae3edec078cb15..bafa38acf93425da8c9f3ba4b525f77888cee5d0 100644
--- a/src/force.h
+++ b/src/force.h
@@ -88,6 +88,7 @@ class Force : protected Pointers {
 
   void create_kspace(int, char **, const char *suffix = NULL);
   class KSpace *new_kspace(int, char **, const char *, int &);
+  class KSpace *kspace_match(const char *, int);
 
   void set_special(int, char **);
   void bounds(char *, int, int &, int &);
diff --git a/src/input.cpp b/src/input.cpp
index 1345ef370e585ca5b74e891df58338a50b5cdd08..b0358004d8b735120921ce19795ff542d0a2311c 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -418,6 +418,7 @@ int Input::execute_command()
   else if (!strcmp(command,"label")) label();
   else if (!strcmp(command,"log")) log();
   else if (!strcmp(command,"next")) next_command();
+  else if (!strcmp(command,"partition")) partition();
   else if (!strcmp(command,"print")) print();
   else if (!strcmp(command,"shell")) shell();
   else if (!strcmp(command,"variable")) variable_command();
@@ -580,7 +581,7 @@ void Input::ifthenelse()
       ncommands++;
     }
     
-    for (int i = 0; i < ncommands; i++) input->one(commands[i]);
+    for (int i = 0; i < ncommands; i++) one(commands[i]);
     
     for (int i = 0; i < ncommands; i++) delete [] commands[i];
     delete [] commands;
@@ -636,7 +637,7 @@ void Input::ifthenelse()
     
     // execute the list of commands
     
-    for (int i = 0; i < ncommands; i++) input->one(commands[i]);
+    for (int i = 0; i < ncommands; i++) one(commands[i]);
     
     // clean up
 
@@ -742,6 +743,40 @@ void Input::next_command()
 
 /* ---------------------------------------------------------------------- */
 
+void Input::partition()
+{
+  if (narg < 3) error->all(FLERR,"Illegal partition command");
+
+  int yesflag;
+  if (strcmp(arg[0],"yes") == 0) yesflag = 1;
+  else if (strcmp(arg[0],"no") == 0) yesflag = 0;
+  else error->all(FLERR,"Illegal partition command");
+
+  int ilo,ihi;
+  force->bounds(arg[1],universe->nworlds,ilo,ihi);
+
+  // copy original line to copy, since will use strtok() on it
+  // ptr = start of 4th word
+
+  strcpy(copy,line);
+  copy[strlen(copy)-1] = '\0';
+  char *ptr = strtok(copy," \t\n\r\f");
+  ptr = strtok(NULL," \t\n\r\f");
+  ptr = strtok(NULL," \t\n\r\f");
+  ptr += strlen(ptr) + 1;
+  ptr += strspn(ptr," \t\n\r\f");
+
+  // execute the remaining command line on requested partitions
+
+  if (yesflag) {
+    if (universe->iworld+1 >= ilo && universe->iworld+1 <= ihi) one(ptr);
+  } else {
+    if (universe->iworld+1 < ilo || universe->iworld+1 > ihi) one(ptr);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 void Input::print()
 {
   if (narg != 1) error->all(FLERR,"Illegal print command");
@@ -1204,19 +1239,9 @@ void Input::pair_write()
 
 void Input::processors()
 {
-  if (narg != 3) error->all(FLERR,"Illegal processors command");
   if (domain->box_exist)
     error->all(FLERR,"Processors command after simulation box is defined");
-
-  if (strcmp(arg[0],"*") == 0) comm->user_procgrid[0] = 0;
-  else comm->user_procgrid[0] = atoi(arg[0]);
-  if (strcmp(arg[1],"*") == 0) comm->user_procgrid[1] = 0;
-  else comm->user_procgrid[1] = atoi(arg[1]);
-  if (strcmp(arg[2],"*") == 0) comm->user_procgrid[2] = 0;
-  else comm->user_procgrid[2] = atoi(arg[2]);
-
-  if (comm->user_procgrid[0] < 0 || comm->user_procgrid[1] < 0 ||
-      comm->user_procgrid[2] < 0) error->all(FLERR,"Illegal processors command");
+  comm->set_processors(narg,arg);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/input.h b/src/input.h
index 2e3259b9fe28cd9ac284b3b067dd6d7a4931df3e..fd08a6098268e02c07ae4e757d511d1a52e5130f 100644
--- a/src/input.h
+++ b/src/input.h
@@ -57,6 +57,7 @@ class Input : protected Pointers {
   void label();
   void log();
   void next_command();
+  void partition();
   void print();
   void shell();
   void variable_command();
diff --git a/src/read_data.cpp b/src/read_data.cpp
index a96a566b017486783fce76428be037818d17b30e..872076b112fa5e706773d82025eec9e8e1d25a1c 100644
--- a/src/read_data.cpp
+++ b/src/read_data.cpp
@@ -137,7 +137,7 @@ void ReadData::command(int narg, char **arg)
   domain->print_box("  ");
   domain->set_initial_box();
   domain->set_global_box();
-  comm->set_procs();
+  comm->set_proc_grid();
   domain->set_local_box();
 
   // customize for new sections
diff --git a/src/read_restart.cpp b/src/read_restart.cpp
index dadc29b64fd2c0ec5585062f352e86671a20203f..02e313a8d08474bbc8c5c3a7f9898730ab6ebc60 100644
--- a/src/read_restart.cpp
+++ b/src/read_restart.cpp
@@ -135,7 +135,7 @@ void ReadRestart::command(int narg, char **arg)
   domain->print_box("  ");
   domain->set_initial_box();
   domain->set_global_box();
-  comm->set_procs();
+  comm->set_proc_grid();
   domain->set_local_box();
 
   // read groups, ntype-length arrays, force field, fix info from file
diff --git a/src/replicate.cpp b/src/replicate.cpp
index da6a662069c94900b428a6418c6022490f0e16c1..ce53825f319c186ede357aac4159908614cc3102 100644
--- a/src/replicate.cpp
+++ b/src/replicate.cpp
@@ -193,7 +193,7 @@ void Replicate::command(int narg, char **arg)
   domain->print_box("  ");
   domain->set_initial_box();
   domain->set_global_box();
-  comm->set_procs();
+  comm->set_proc_grid();
   domain->set_local_box();
 
   // copy type arrays to new atom class
diff --git a/src/verlet_split.cpp b/src/verlet_split.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..39d7185800682bf3350642622fcd40156e146067
--- /dev/null
+++ b/src/verlet_split.cpp
@@ -0,0 +1,480 @@
+/* -------------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Yuxing Peng and Chris Knight (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "string.h"
+#include "verlet_split.h"
+#include "universe.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "comm.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "dihedral.h"
+#include "improper.h"
+#include "kspace.h"
+#include "output.h"
+#include "update.h"
+#include "modify.h"
+#include "timer.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+VerletSplit::VerletSplit(LAMMPS *lmp, int narg, char **arg) :
+  Verlet(lmp, narg, arg)
+{
+  // error checks on partitions
+
+  if (universe->nworlds != 2) 
+    error->universe_all(FLERR,"Verlet/split requires 2 partitions");
+  if (universe->procs_per_world[0] % universe->procs_per_world[1])
+    error->universe_all(FLERR,"Verlet/split requires Rspace partition "
+			"size be multiple of Kspace partition size");
+
+  // master = 1 for Rspace procs, 0 for Kspace procs
+
+  if (universe->iworld == 0) master = 1;
+  else master = 0;
+
+  ratio = universe->procs_per_world[0] / universe->procs_per_world[1];
+
+  // Kspace root proc broadcasts info about Kspace proc layout to Rspace procs
+
+  int kspace_procgrid[3];
+
+  if (universe->me == universe->root_proc[1]) {
+    kspace_procgrid[0] = comm->procgrid[0];
+    kspace_procgrid[1] = comm->procgrid[1];
+    kspace_procgrid[2] = comm->procgrid[2];
+  }
+  MPI_Bcast(kspace_procgrid,3,MPI_INT,universe->root_proc[1],universe->uworld);
+
+  int ***kspace_grid2proc;
+  memory->create(kspace_grid2proc,kspace_procgrid[0],
+		 kspace_procgrid[1],kspace_procgrid[2],
+		 "verlet/split:kspace_grid2proc");
+
+  if (universe->me == universe->root_proc[1]) {
+    for (int i = 0; i < comm->procgrid[0]; i++)
+      for (int j = 0; j < comm->procgrid[1]; j++)
+	for (int k = 0; k < comm->procgrid[2]; k++)
+	  kspace_grid2proc[i][j][k] = comm->grid2proc[i][j][k];
+  }
+  MPI_Bcast(&kspace_grid2proc[0][0][0],
+	    kspace_procgrid[0]*kspace_procgrid[1]*kspace_procgrid[2],MPI_INT,
+	    universe->root_proc[1],universe->uworld);
+
+  // Rspace partition must be multiple of Kspace partition in each dim
+  // so atoms of one Kspace proc coincide with atoms of several Rspace procs
+
+  if (master) {
+    int flag = 0;
+    if (comm->procgrid[0] % kspace_procgrid[0]) flag = 1;
+    if (comm->procgrid[1] % kspace_procgrid[1]) flag = 1;
+    if (comm->procgrid[2] % kspace_procgrid[2]) flag = 1;
+    if (flag)
+      error->one(FLERR,
+		 "Verlet/split requires Rspace partition layout be "
+		 "multiple of Kspace partition layout in each dim");
+  }
+
+  // block = 1 Kspace proc with set of Rspace procs it overlays
+  // me_block = 0 for Kspace proc
+  // me_block = 1 to ratio for Rspace procs
+  // block = MPI communicator for that set of procs
+
+  int iblock,key;
+
+  if (!master) {
+    iblock = comm->me;
+    key = 0;
+  } else {
+    int kpx = comm->myloc[0] / (comm->procgrid[0]/kspace_procgrid[0]);
+    int kpy = comm->myloc[1] / (comm->procgrid[1]/kspace_procgrid[1]);
+    int kpz = comm->myloc[2] / (comm->procgrid[2]/kspace_procgrid[2]);
+    iblock = kspace_grid2proc[kpx][kpy][kpz];
+    key = 1;
+  }
+
+  MPI_Comm_split(universe->uworld,iblock,key,&block);
+  MPI_Comm_rank(block,&me_block);
+
+  // output block groupings to universe screen/logfile
+  // bmap is ordered by block and then by proc within block
+
+  int *bmap = new int[universe->nprocs];
+  for (int i = 0; i < universe->nprocs; i++) bmap[i] = -1;
+  bmap[iblock*(ratio+1)+me_block] = universe->me;
+
+  int *bmapall = new int[universe->nprocs];
+  MPI_Allreduce(bmap,bmapall,universe->nprocs,MPI_INT,MPI_MAX,universe->uworld);
+
+  if (universe->me == 0) {
+    if (universe->uscreen) {
+      fprintf(universe->uscreen,"Rspace/Kspace procs in each block:\n");
+      int m = 0;
+      for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
+	fprintf(universe->uscreen,"  block %d:",i);
+	int kspace_proc = bmapall[m++];
+	for (int j = 1; j <= ratio; j++)
+	  fprintf(universe->uscreen," %d",bmapall[m++]);
+	fprintf(universe->uscreen," %d\n",kspace_proc);
+      }
+    }
+    if (universe->ulogfile) {
+      fprintf(universe->ulogfile,"Rspace/Kspace procs in each block:\n");
+      int m = 0;
+      for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
+	fprintf(universe->ulogfile,"  block %d:",i);
+	int kspace_proc = bmapall[m++];
+	for (int j = 1; j <= ratio; j++)
+	  fprintf(universe->ulogfile," %d",bmapall[m++]);
+	fprintf(universe->ulogfile," %d\n",kspace_proc);
+      }
+    }
+  }
+
+  memory->destroy(kspace_grid2proc);
+  delete [] bmap;
+  delete [] bmapall;
+
+  // size/disp = vectors for MPI gather/scatter within block
+
+  qsize = new int[ratio+1];
+  qdisp = new int[ratio+1];
+  xsize = new int[ratio+1];
+  xdisp = new int[ratio+1];
+
+  // f_kspace = Rspace copy of Kspace forces
+  // allocate dummy version for Kspace partition
+
+  maxatom = 0;
+  f_kspace = NULL;
+  if (!master) memory->create(f_kspace,1,1,"verlet/split:f_kspace");
+}
+
+/* ---------------------------------------------------------------------- */
+
+VerletSplit::~VerletSplit()
+{
+  delete [] qsize;
+  delete [] qdisp;
+  delete [] xsize;
+  delete [] xdisp;
+  memory->destroy(f_kspace);
+  MPI_Comm_free(&block);
+}
+
+/* ----------------------------------------------------------------------
+   initialization before run
+------------------------------------------------------------------------- */
+
+void VerletSplit::init()
+{
+  if (!force->kspace && comm->me == 0) 
+    error->warning(FLERR,"No Kspace calculation with verlet/split");
+
+  if (force->kspace_match("tip4p",0)) tip4p_flag = 1;
+  else tip4p_flag = 0;
+
+  Verlet::init();
+}
+
+/* ----------------------------------------------------------------------
+   run for N steps
+   master partition does everything but Kspace
+   servant partition does just Kspace
+   communicate back and forth every step:
+     atom coords from master -> servant
+     kspace forces from servant -> master
+     also box bounds from master -> servant if necessary
+------------------------------------------------------------------------- */
+
+void VerletSplit::run(int n)
+{
+  int nflag,ntimestep,sortflag;
+
+  // reset LOOP timer due to imbalance in setup() on 2 partitions
+  // setup initial Rspace <-> Kspace comm params
+
+  MPI_Barrier(universe->uworld);
+  timer->array[TIME_LOOP] = MPI_Wtime();
+  rk_setup();
+
+  // flags for timestepping iterations
+
+  int n_post_integrate = modify->n_post_integrate;
+  int n_pre_exchange = modify->n_pre_exchange;
+  int n_pre_neighbor = modify->n_pre_neighbor;
+  int n_pre_force = modify->n_pre_force;
+  int n_post_force = modify->n_post_force;
+  int n_end_of_step = modify->n_end_of_step;
+
+  if (atom->sortfreq > 0) sortflag = 1;
+  else sortflag = 0;
+
+  for (int i = 0; i < n; i++) {
+
+    ntimestep = ++update->ntimestep;
+    ev_set(ntimestep);
+
+    // initial time integration
+ 
+    if (master) {
+      modify->initial_integrate(vflag);
+      if (n_post_integrate) modify->post_integrate();
+    }
+
+    // regular communication vs neighbor list rebuild
+
+    if (master) nflag = neighbor->decide();
+    MPI_Bcast(&nflag,1,MPI_INT,1,block);
+
+    if (master) {
+      if (nflag == 0) {
+	timer->stamp();
+	comm->forward_comm();
+	timer->stamp(TIME_COMM);
+      } else {
+	if (n_pre_exchange) modify->pre_exchange();
+	if (triclinic) domain->x2lamda(atom->nlocal);
+	domain->pbc();
+	if (domain->box_change) {
+	  domain->reset_box();
+	  comm->setup();
+	  if (neighbor->style) neighbor->setup_bins();
+	}
+	timer->stamp();
+	comm->exchange();
+	if (sortflag && ntimestep >= atom->nextsort) atom->sort();
+	comm->borders();
+	if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
+	timer->stamp(TIME_COMM);
+	if (n_pre_neighbor) modify->pre_neighbor();
+	neighbor->build();
+	timer->stamp(TIME_NEIGHBOR);
+      }
+    }
+    
+    // if reneighboring occurred, re-setup Rspace <-> Kspace comm params
+    // comm Rspace atom coords to Kspace procs
+
+    if (nflag) rk_setup();
+    r2k_comm();
+  
+    // force computations
+
+    force_clear();
+
+    if (master) {
+      if (n_pre_force) modify->pre_force(vflag);
+
+      timer->stamp();
+      if (force->pair) {
+        force->pair->compute(eflag,vflag);
+        timer->stamp(TIME_PAIR);
+      }
+      
+      if (atom->molecular) {
+        if (force->bond) force->bond->compute(eflag,vflag);
+        if (force->angle) force->angle->compute(eflag,vflag);
+        if (force->dihedral) force->dihedral->compute(eflag,vflag);
+        if (force->improper) force->improper->compute(eflag,vflag);
+        timer->stamp(TIME_BOND);
+      }
+
+      if (force->newton) {
+	comm->reverse_comm();
+	timer->stamp(TIME_COMM);
+      }
+
+    } else {
+      if (force->kspace) {
+        timer->stamp();
+        force->kspace->compute(eflag,vflag);
+        timer->stamp(TIME_KSPACE);
+      }
+    }
+
+    // comm and sum Kspace forces back to Rspace procs
+
+    k2r_comm();
+
+    // force modifications, final time integration, diagnostics
+    // all output
+
+    if (master) {
+      if (n_post_force) modify->post_force(vflag);
+      modify->final_integrate();
+      if (n_end_of_step) modify->end_of_step();
+
+      if (ntimestep == output->next) {
+	timer->stamp();
+	output->write(ntimestep);
+	timer->stamp(TIME_OUTPUT);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   setup params for Rspace <-> Kspace communication
+   called initially and after every reneighbor
+   also communcicate atom charges from Rspace to KSpace since static
+------------------------------------------------------------------------- */
+
+void VerletSplit::rk_setup()
+{
+  // grow f_kspace array on master procs if necessary
+
+  if (master) {
+    if (atom->nlocal > maxatom) {
+      memory->destroy(f_kspace);
+      maxatom = atom->nmax;
+      memory->create(f_kspace,maxatom,3,"verlet/split:f_kspace");
+    }
+  }
+
+  // qsize = # of atoms owned by each master proc in block
+
+  int n = 0;
+  if (master) n = atom->nlocal;
+  MPI_Gather(&n,1,MPI_INT,qsize,1,MPI_INT,0,block);
+
+  // setup qdisp, xsize, xdisp based on qsize
+  // only needed by Kspace proc
+  // set Kspace nlocal to sum of Rspace nlocals
+  // insure Kspace atom arrays are large enough
+
+  if (!master) { 
+    qsize[0] = qdisp[0] = xsize[0] = xdisp[0] = 0;
+    for (int i = 1; i <= ratio; i++) {
+      qdisp[i] = qdisp[i-1]+qsize[i-1];
+      xsize[i] = 3*qsize[i];
+      xdisp[i] = xdisp[i-1]+xsize[i-1];
+    }
+
+    atom->nlocal = qdisp[ratio] + qsize[ratio];
+    while (atom->nmax <= atom->nlocal) atom->avec->grow(0);
+    atom->nghost = 0;
+  }
+  
+  // one-time gather of Rspace atom charges to Kspace proc
+
+  MPI_Gatherv(atom->q,n,MPI_DOUBLE,atom->q,qsize,qdisp,MPI_DOUBLE,0,block);
+
+  // for TIP4P also need to send type and tag
+  // KSpace proc needs to acquire ghost atoms and map all its atoms
+
+  if (tip4p_flag) {
+    MPI_Gatherv(atom->type,n,MPI_INT,atom->type,qsize,qdisp,MPI_INT,0,block);
+    MPI_Gatherv(atom->tag,n,MPI_INT,atom->tag,qsize,qdisp,MPI_INT,0,block);
+    if (!master) {
+      if (triclinic) domain->x2lamda(atom->nlocal);
+      if (domain->box_change) comm->setup();
+      timer->stamp();
+      atom->map_clear();          // in lieu of comm->exchange()
+      comm->borders();
+      if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
+      timer->stamp(TIME_COMM);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   communicate Rspace atom coords to Kspace
+   also eflag,vflag and box bounds if needed
+------------------------------------------------------------------------- */
+
+void VerletSplit::r2k_comm()
+{
+  MPI_Status status;
+
+  int n = 0;
+  if (master) n = atom->nlocal;
+  MPI_Gatherv(atom->x[0],n*3,MPI_DOUBLE,atom->x[0],xsize,xdisp,
+	      MPI_DOUBLE,0,block);
+
+  // send eflag,vflag from Rspace to Kspace
+
+  if (me_block == 1) {
+    int flags[2];
+    flags[0] = eflag; flags[1] = vflag;
+    MPI_Send(flags,2,MPI_INT,0,0,block);
+  } else if (!master) {
+    int flags[2];
+    MPI_Recv(flags,2,MPI_DOUBLE,1,0,block,&status);
+    eflag = flags[0]; vflag = flags[1];
+  }
+
+  // send box bounds from Rspace to Kspace if simulation box is dynamic
+
+  if (domain->box_change) {
+    if (me_block == 1) {
+      MPI_Send(domain->boxlo,3,MPI_DOUBLE,0,0,block);
+      MPI_Send(domain->boxhi,3,MPI_DOUBLE,0,0,block);
+    } else if (!master) {
+      MPI_Recv(domain->boxlo,3,MPI_DOUBLE,1,0,block,&status);
+      MPI_Recv(domain->boxhi,3,MPI_DOUBLE,1,0,block,&status);
+      domain->set_global_box();
+      domain->set_local_box();
+      force->kspace->setup();
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   communicate and sum Kspace atom forces back to Rspace
+------------------------------------------------------------------------- */
+
+void VerletSplit::k2r_comm()
+{
+  if (eflag) MPI_Bcast(&force->kspace->energy,1,MPI_DOUBLE,0,block);
+  if (vflag) MPI_Bcast(force->kspace->virial,6,MPI_DOUBLE,0,block);
+
+  int n = 0;
+  if (master) n = atom->nlocal;
+  MPI_Scatterv(atom->f[0],xsize,xdisp,MPI_DOUBLE,
+	       f_kspace[0],n*3,MPI_DOUBLE,0,block);
+  
+  if (master) {
+    double **f = atom->f;
+    int nlocal = atom->nlocal;
+    for (int i = 0; i < nlocal; i++) {
+      f[i][0] += f_kspace[i][0];
+      f[i][1] += f_kspace[i][1];
+      f[i][2] += f_kspace[i][2];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of Kspace force array on master procs
+------------------------------------------------------------------------- */
+
+bigint VerletSplit::memory_usage()
+{
+  bigint bytes = maxatom*3 * sizeof(double);
+  return bytes;
+}
diff --git a/src/verlet_split.h b/src/verlet_split.h
new file mode 100644
index 0000000000000000000000000000000000000000..080a6057eb1fcba7ccf352a6cc3f73d48f3a66a3
--- /dev/null
+++ b/src/verlet_split.h
@@ -0,0 +1,54 @@
+/* -------------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef INTEGRATE_CLASS
+
+IntegrateStyle(verlet/split,VerletSplit)
+
+#else
+
+#ifndef LMP_VERLET_SPLIT_H
+#define LMP_VERLET_SPLIT_H
+
+#include "verlet.h"
+
+namespace LAMMPS_NS {
+
+class VerletSplit : public Verlet {
+ public:
+  VerletSplit(class LAMMPS *, int, char **);
+  ~VerletSplit();
+  void init();
+  void run(int);
+  bigint memory_usage();
+
+ private:
+  int master;                        // 1 if an Rspace proc, 0 if Kspace
+  int me_block;                      // proc ID within Rspace/Kspace block
+  int ratio;                         // ratio of Rspace procs to Kspace procs
+  int *qsize,*qdisp,*xsize,*xdisp;   // MPI gather/scatter params for block comm
+  MPI_Comm block;                    // communicator within one block
+  int tip4p_flag;                    // 1 if PPPM/tip4p so do extra comm
+
+  double **f_kspace;                 // copy of Kspace forces on Rspace procs
+  int maxatom;
+  
+  void rk_setup();
+  void r2k_comm();
+  void k2r_comm();
+};
+
+}
+
+#endif
+#endif