From c1b0b1b3f9d7e21ee991aaf554edce6a328cc21e Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 16 Jun 2017 18:17:48 -0400
Subject: [PATCH] restore old qsort() based code and add preprocessor
 directives to switch

-DLMP_USE_LIBC_QSORT will use qsort() from libc to sort (requires static/global variables).
-DLMP_USE_MERGE_SORT will use a plain merge sort. slightly slower for expensive comparisons.
-DLMP_USE_HYBRID_SORT will use hybrid merge sort. faster than merge sort (no static/global variables)
---
 src/dump.cpp      | 85 +++++++++++++++++++++++++++++++++++++++++++++--
 src/dump.h        | 11 ++++++
 src/irregular.cpp | 47 ++++++++++++++++++++++++--
 src/irregular.h   |  6 ++++
 src/mergesort.h   | 22 +++++++++---
 5 files changed, 162 insertions(+), 9 deletions(-)

diff --git a/src/dump.cpp b/src/dump.cpp
index 88711deff4..dde89024ff 100644
--- a/src/dump.cpp
+++ b/src/dump.cpp
@@ -30,7 +30,12 @@
 
 using namespace LAMMPS_NS;
 
+#if defined(LMP_USE_LIBC_QSORT)
+// allocate space for static class variable
+Dump *Dump::dumpptr;
+#else
 #include "mergesort.h"
+#endif
 
 #define BIG 1.0e20
 #define EPSILON 1.0e-6
@@ -687,12 +692,22 @@ void Dump::sort()
         index[idsort[i]-idlo] = i;
   }
 
+#if defined(LMP_USE_LIBC_QSORT)
+  if (!reorderflag) {
+    dumpptr = this;
+    for (i = 0; i < nme; i++) index[i] = i;
+    if (sortcol == 0) qsort(index,nme,sizeof(int),idcompare);
+    else if (sortorder == ASCEND) qsort(index,nme,sizeof(int),bufcompare);
+    else qsort(index,nme,sizeof(int),bufcompare_reverse);
+  }
+#else
   if (!reorderflag) {
     for (i = 0; i < nme; i++) index[i] = i;
-    if (sortcol==0) merge_sort(index,nme,(void *)this,idcompare);
-    else if (sortorder==ASCEND) merge_sort(index,nme,(void *)this,bufcompare);
+    if (sortcol == 0) merge_sort(index,nme,(void *)this,idcompare);
+    else if (sortorder == ASCEND) merge_sort(index,nme,(void *)this,bufcompare);
     else merge_sort(index,nme,(void *)this,bufcompare_reverse);
   }
+#endif
 
   // reset buf size and maxbuf to largest of any post-sort nme values
   // this insures proc 0 can receive everyone's info
@@ -713,6 +728,69 @@ void Dump::sort()
     memcpy(&buf[i*size_one],&bufsort[index[i]*size_one],nbytes);
 }
 
+#if defined(LMP_USE_LIBC_QSORT)
+/* ----------------------------------------------------------------------
+   compare two atom IDs
+   called via qsort() in sort() method
+   is a static method so access data via dumpptr
+------------------------------------------------------------------------- */
+
+int Dump::idcompare(const void *pi, const void *pj)
+{
+  tagint *idsort = dumpptr->idsort;
+
+  int i = *((int *) pi);
+  int j = *((int *) pj);
+
+  if (idsort[i] < idsort[j]) return -1;
+  if (idsort[i] > idsort[j]) return 1;
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+   compare two buffer values with size_one stride
+   called via qsort() in sort() method
+   is a static method so access data via dumpptr
+   sort in ASCENDing order
+------------------------------------------------------------------------- */
+
+int Dump::bufcompare(const void *pi, const void *pj)
+{
+  double *bufsort = dumpptr->bufsort;
+  int size_one = dumpptr->size_one;
+  int sortcolm1 = dumpptr->sortcolm1;
+
+  int i = *((int *) pi)*size_one + sortcolm1;
+  int j = *((int *) pj)*size_one + sortcolm1;
+
+  if (bufsort[i] < bufsort[j]) return -1;
+  if (bufsort[i] > bufsort[j]) return 1;
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+   compare two buffer values with size_one stride
+   called via qsort() in sort() method
+   is a static method so access data via dumpptr
+   sort in DESCENDing order
+------------------------------------------------------------------------- */
+
+int Dump::bufcompare_reverse(const void *pi, const void *pj)
+{
+  double *bufsort = dumpptr->bufsort;
+  int size_one = dumpptr->size_one;
+  int sortcolm1 = dumpptr->sortcolm1;
+
+  int i = *((int *) pi)*size_one + sortcolm1;
+  int j = *((int *) pj)*size_one + sortcolm1;
+
+  if (bufsort[i] > bufsort[j]) return -1;
+  if (bufsort[i] < bufsort[j]) return 1;
+  return 0;
+}
+
+#else
+
 /* ----------------------------------------------------------------------
    compare two atom IDs
    called via merge_sort() in sort() method
@@ -745,6 +823,7 @@ int Dump::bufcompare(const int i, const int j, void *ptr)
   else if (bufsort[ii] > bufsort[jj]) return 1;
   else return 0;
 }
+
 /* ----------------------------------------------------------------------
    compare two buffer values with size_one stride
    called via merge_sort() in sort() method
@@ -766,6 +845,8 @@ int Dump::bufcompare_reverse(const int i, const int j, void *ptr)
   else return 0;
 }
 
+#endif
+
 /* ----------------------------------------------------------------------
    process params common to all dumps here
    if unknown param, call modify_param specific to the dump
diff --git a/src/dump.h b/src/dump.h
index ae83d75d20..3c1450854e 100644
--- a/src/dump.h
+++ b/src/dump.h
@@ -33,6 +33,11 @@ class Dump : protected Pointers {
   int comm_forward;          // size of forward communication (0 if none)
   int comm_reverse;          // size of reverse communication (0 if none)
 
+#if defined(LMP_USE_LIBC_QSORT)
+  // static variable across all Dump objects
+  static Dump *dumpptr;         // holds a ptr to Dump currently being used
+#endif
+
   Dump(class LAMMPS *, int, char **);
   virtual ~Dump();
   void init();
@@ -130,9 +135,15 @@ class Dump : protected Pointers {
   void pbc_allocate();
 
   void sort();
+#if defined(LMP_USE_LIBC_QSORT)
+  static int idcompare(const void *, const void *);
+  static int bufcompare(const void *, const void *);
+  static int bufcompare_reverse(const void *, const void *);
+#else
   static int idcompare(const int, const int, void *);
   static int bufcompare(const int, const int, void *);
   static int bufcompare_reverse(const int, const int, void *);
+#endif
 };
 
 }
diff --git a/src/irregular.cpp b/src/irregular.cpp
index 94882ee65a..74fdb6f772 100644
--- a/src/irregular.cpp
+++ b/src/irregular.cpp
@@ -21,13 +21,22 @@
 #include "comm.h"
 #include "memory.h"
 
-#include "mergesort.h"
-
 using namespace LAMMPS_NS;
 
+#if defined(LMP_USE_LIBC_QSORT)
+// allocate space for static class variable
 // prototype for non-class function
 
+int *Irregular::proc_recv_copy;
+static int compare_standalone(const void *, const void *);
+
+#else
+
+#include "mergesort.h"
+
+// prototype for non-class function
 static int compare_standalone(const int, const int, void *);
+#endif
 
 enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED};    // several files
 
@@ -423,7 +432,13 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
     int *length_recv_ordered = new int[nrecv_proc];
 
     for (i = 0; i < nrecv_proc; i++) order[i] = i;
+
+#if defined(LMP_USE_LIBC_QSORT)
+    proc_recv_copy = proc_recv;
+    qsort(order,nrecv_proc,sizeof(int),compare_standalone);
+#else
     merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+#endif
 
     int j;
     for (i = 0; i < nrecv_proc; i++) {
@@ -449,6 +464,25 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
   return nrecvsize;
 }
 
+#if defined(LMP_USE_LIBC_QSORT)
+
+/* ----------------------------------------------------------------------
+   comparison function invoked by qsort()
+   accesses static class member proc_recv_copy, set before call to qsort()
+------------------------------------------------------------------------- */
+
+int compare_standalone(const void *iptr, const void *jptr)
+{
+  int i = *((int *) iptr);
+  int j = *((int *) jptr);
+  int *proc_recv = Irregular::proc_recv_copy;
+  if (proc_recv[i] < proc_recv[j]) return -1;
+  if (proc_recv[i] > proc_recv[j]) return 1;
+  return 0;
+}
+
+#else
+
 /* ----------------------------------------------------------------------
    comparison function invoked by merge_sort()
    void pointer contains proc_recv list;
@@ -462,6 +496,8 @@ int compare_standalone(const int i, const int j, void *ptr)
   return 0;
 }
 
+#endif
+
 /* ----------------------------------------------------------------------
    communicate atoms via PlanAtom
    sendbuf = list of atoms to send
@@ -668,8 +704,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
     int *num_recv_ordered = new int[nrecv_proc];
 
     for (i = 0; i < nrecv_proc; i++) order[i] = i;
-    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
 
+#if defined(LMP_USE_LIBC_QSORT)
+    proc_recv_copy = proc_recv;
+    qsort(order,nrecv_proc,sizeof(int),compare_standalone);
+#else
+    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+#endif
     int j;
     for (i = 0; i < nrecv_proc; i++) {
       j = order[i];
diff --git a/src/irregular.h b/src/irregular.h
index f759b4e698..5b2a771847 100644
--- a/src/irregular.h
+++ b/src/irregular.h
@@ -21,6 +21,12 @@ namespace LAMMPS_NS {
 class Irregular : protected Pointers {
  public:
 
+#if defined(LMP_USE_LIBC_QSORT)
+  // static variable across all Irregular objects, for qsort callback
+
+  static int *proc_recv_copy;
+#endif
+
   Irregular(class LAMMPS *);
   ~Irregular();
   void migrate_atoms(int sortflag = 0, int preassign = 0,
diff --git a/src/mergesort.h b/src/mergesort.h
index baa6b7830b..3597db52fb 100644
--- a/src/mergesort.h
+++ b/src/mergesort.h
@@ -20,8 +20,15 @@
 // an opaque pointer to the comparison function, e.g. for access to
 // class members. this avoids having to use global variables.
 // for improved performance, we employ an in-place insertion sort on
-// chunks of up to 32 elements and switch to merge sort from then on.
+// chunks of up to 64 elements and switch to merge sort from then on.
 
+// compile with -DLMP_USE_MERGE_SORT to switch to a plain merge sort
+
+#if !defined(LMP_USE_MERGE_SORT) && !defined(LMP_USE_HYBRID_SORT)
+#define LMP_USE_HYBRID_SORT
+#endif
+
+#if defined(LMP_USE_HYBRID_SORT)
 // part 1. insertion sort for pre-sorting of small chunks
 
 static void insertion_sort(int *index, int num, void *ptr,
@@ -41,6 +48,7 @@ static void insertion_sort(int *index, int num, void *ptr,
     }
   }
 }
+#endif
 
 // part 2. merge two sublists
 
@@ -55,7 +63,7 @@ static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
       idx[i++] = buf[l++];
     else idx[i++] = buf[r++];
   }
-    
+
   while (l < lhi) idx[i++] = buf[l++];
   while (r < rhi) idx[i++] = buf[r++];
 }
@@ -70,9 +78,11 @@ static void merge_sort(int *index, int num, void *ptr,
 
   int chunk,i,j;
 
-  // do insertion sort on chunks of up to 32 elements
+#if defined(LMP_USE_HYBRID_SORT)
+
+  // do insertion sort on chunks of up to 64 elements
 
-  chunk = 32;
+  chunk = 64;
   for (i=0; i < num; i += chunk) {
     j = (i+chunk > num) ? num-i : chunk;
     insertion_sort(index+i,j,ptr,comp);
@@ -82,6 +92,10 @@ static void merge_sort(int *index, int num, void *ptr,
 
   if (chunk >= num) return;
 
+#else
+  chunk = 1;
+#endif
+
   // continue with merge sort on the pre-sorted chunks.
   // we need an extra buffer for temporary storage and two
   // pointers to operate on, so we can swap the pointers
-- 
GitLab