From adb129fe991dc0554d27b0f8fb36a9cce76d30b6 Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Wed, 6 Mar 2013 16:44:43 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9581
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/USER-OMP/fix_omp.cpp | 49 ++++++++++++++++++----------------------
 src/USER-OMP/fix_omp.h   | 12 ++++++----
 src/USER-OMP/thr_omp.cpp | 11 +++++++--
 src/USER-OMP/thr_omp.h   |  4 ++--
 4 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp
index e2aaab594c..bac001795d 100644
--- a/src/USER-OMP/fix_omp.cpp
+++ b/src/USER-OMP/fix_omp.cpp
@@ -68,7 +68,7 @@ static int get_tid()
 FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) 
   :  Fix(lmp, narg, arg),
      thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL),
-     _nthr(-1), _neighbor(true), _mixed(false)
+     _nthr(-1), _neighbor(true), _mixed(false), _reduced(true)
 {
   if ((narg < 4) || (narg > 7)) error->all(FLERR,"Illegal package omp command");
   if (strcmp(arg[1],"all") != 0) error->all(FLERR,"fix OMP has to operate on group 'all'");
@@ -87,8 +87,10 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
   if (nthreads < 1)
     error->all(FLERR,"Illegal number of OpenMP threads requested");
 
+  int reset_thr = 0;
   if (nthreads != comm->nthreads) {
 #if defined(_OPENMP)
+    reset_thr = 1;
     omp_set_num_threads(nthreads);
 #endif
     comm->nthreads = nthreads;
@@ -110,25 +112,21 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
 
   // print summary of settings
   if (comm->me == 0) {
-    const char * const nmode = _neighbor ? "OpenMP capable" : "serial";
+    const char * const nmode = _neighbor ? "multi-threaded" : "serial";
     const char * const kmode = _mixed ? "mixed" : "double";
 
     if (screen) {
-      fprintf(screen,"  reset %d OpenMP thread(s) per MPI task\n", nthreads);
-      fprintf(screen,"  using %s neighbor list subroutines\n", nmode);
-      if (_mixed)
-        fputs("  using mixed precision OpenMP force kernels where available\n", screen);
-      else
-        fputs("  using double precision OpenMP force kernels\n", screen);
+      if (reset_thr)
+	fprintf(screen,"set %d OpenMP thread(s) per MPI task\n", nthreads);
+      fprintf(screen,"using %s neighbor list subroutines\n", nmode);
+      fprintf(screen,"prefer %s precision OpenMP force kernels\n", kmode);
     }
     
     if (logfile) {
-      fprintf(logfile,"  reset %d OpenMP thread(s) per MPI task\n", nthreads);
-      fprintf(logfile,"  using %s neighbor list subroutines\n", nmode);
-      if (_mixed)
-        fputs("  using mixed precision OpenMP force kernels where available\n", logfile);
-      else
-        fputs("  using double precision OpenMP force kernels\n", logfile);
+      if (reset_thr)
+	fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads);
+      fprintf(logfile,"using %s neighbor list subroutines\n", nmode);
+      fprintf(logfile,"prefer %s precision OpenMP force kernels\n", kmode);
     }
   }
 
@@ -137,7 +135,6 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
   // encourage the OS to use storage that is "close" to each thread's CPU.
   thr = new ThrData *[nthreads];
   _nthr = nthreads;
-  _clearforce = true;
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
@@ -188,8 +185,9 @@ int FixOMP::setmask()
 
 void FixOMP::init()
 {
-  if (strstr(update->integrate_style,"respa") != NULL)
-    error->all(FLERR,"Cannot use r-RESPA with /omp styles");
+  if ((strstr(update->integrate_style,"respa") != NULL)
+      && (strstr(update->integrate_style,"respa/omp") == NULL))
+    error->all(FLERR,"Need to use respa/omp for r-RESPA with /omp styles");
 
   int check_hybrid, kspace_split;
   last_pair_hybrid = NULL;
@@ -282,7 +280,6 @@ void FixOMP::init()
         fprintf(logfile,"Last active /omp style is %s_style %s\n",
                 last_force_name, last_omp_name);
     } else {
-      _clearforce = false;
       if (screen)
         fprintf(screen,"No /omp style for force computation currently active\n");
       if (logfile)
@@ -322,18 +319,16 @@ void FixOMP::pre_force(int)
   double *de = atom->de;
   double *drho = atom->drho;
 
-  if (_clearforce) {
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(f,torque,erforce,de,drho)
 #endif
-    {
-      const int tid = get_tid();
-      thr[tid]->check_tid(tid);
-      thr[tid]->init_force(nall,f,torque,erforce,de,drho);
-    }
-  } else {
-    thr[0]->init_force(nall,f,torque,erforce,de,drho);
-  }
+  {
+    const int tid = get_tid();
+    thr[tid]->check_tid(tid);
+    thr[tid]->init_force(nall,f,torque,erforce,de,drho);
+  } // end of omp parallel region
+
+  _reduced = false;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/USER-OMP/fix_omp.h b/src/USER-OMP/fix_omp.h
index e651849984..656ab752a8 100644
--- a/src/USER-OMP/fix_omp.h
+++ b/src/USER-OMP/fix_omp.h
@@ -29,6 +29,7 @@ class ThrData;
 
 class FixOMP : public Fix {
   friend class ThrOMP;
+  friend class RespaOMP;
 
  public:
   FixOMP(class LAMMPS *, int, char **);
@@ -51,6 +52,8 @@ class FixOMP : public Fix {
                            // to do the general force reduction
   void *last_pair_hybrid;  // pointer to the pair style that needs
                            // to call virial_fdot_compute()
+  // signal that an /omp style did the force reduction. needed by respa/omp
+  void did_reduce() { _reduced = true;  }
 
  public:
   ThrData *get_thr(int tid) { return thr[tid];  }
@@ -58,12 +61,13 @@ class FixOMP : public Fix {
 
   bool get_neighbor() const { return _neighbor; }
   bool get_mixed()    const { return _mixed;    }
+  bool get_reduced()  const { return _reduced;  }
 
  private:
-  int  _nthr;     // number of currently active ThrData object
-  bool _neighbor; // en/disable threads for neighbor list construction
-  bool _clearforce; // whether to clear per thread data objects
-  bool _mixed;     // whether to use a mixed precision compute kernel
+  int  _nthr;       // number of currently active ThrData objects
+  bool _neighbor;   // en/disable threads for neighbor list construction
+  bool _mixed;      // whether to prefer mixed precision compute kernels
+  bool _reduced;    // whether forces have been reduced for this step
 
   void set_neighbor_omp();
 };
diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp
index 5bfc496389..4aea630554 100644
--- a/src/USER-OMP/thr_omp.cpp
+++ b/src/USER-OMP/thr_omp.cpp
@@ -183,6 +183,7 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
           // pair_style hybrid will compute fdotr for us
           // but we first need to reduce the forces
           data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
+	  fix->did_reduce();
           need_force_reduce = 0;
         }
       }
@@ -380,7 +381,11 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
     break;
 
   case THR_KSPACE:
-    // nothing to do. XXX need to add support for per-atom info
+    // nothing to do. XXX may need to add support for per-atom info
+    break;
+
+  case THR_INTGR:
+    // nothing to do
     break;
 
   default:
@@ -389,8 +394,10 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
   }
 
   if (style == fix->last_omp_style) {
-    if (need_force_reduce)
+    if (need_force_reduce) {
       data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
+      fix->did_reduce();
+    }
 
     if (lmp->atom->torque)
       data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h
index 587c24f38a..9348058d87 100644
--- a/src/USER-OMP/thr_omp.h
+++ b/src/USER-OMP/thr_omp.h
@@ -58,8 +58,8 @@ class ThrOMP {
 
   enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2,
         THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5,
-        THR_CHARMM=1<<6, /*THR_PROXY=1<<7,*/ THR_HYBRID=1<<8,
-        THR_FIX=1<<9};
+        THR_CHARMM=1<<6, /*THR_PROXY=1<<7,THR_HYBRID=1<<8, */
+        THR_FIX=1<<9,THR_INTGR=1<<10};
 
  protected:
   // extra ev_tally setup work for threaded styles
-- 
GitLab