From c1b0b1b3f9d7e21ee991aaf554edce6a328cc21e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer <akohlmey@gmail.com> Date: Fri, 16 Jun 2017 18:17:48 -0400 Subject: [PATCH] restore old qsort() based code and add preprocessor directives to switch -DLMP_USE_LIBC_QSORT will use qsort() from libc to sort (requires static/global variables). -DLMP_USE_MERGE_SORT will use a plain merge sort. slightly slower for expensive comparisons. -DLMP_USE_HYBRID_SORT will use hybrid merge sort. faster than merge sort (no static/global variables) --- src/dump.cpp | 85 +++++++++++++++++++++++++++++++++++++++++++++-- src/dump.h | 11 ++++++ src/irregular.cpp | 47 ++++++++++++++++++++++++-- src/irregular.h | 6 ++++ src/mergesort.h | 22 +++++++++--- 5 files changed, 162 insertions(+), 9 deletions(-) diff --git a/src/dump.cpp b/src/dump.cpp index 88711deff4..dde89024ff 100644 --- a/src/dump.cpp +++ b/src/dump.cpp @@ -30,7 +30,12 @@ using namespace LAMMPS_NS; +#if defined(LMP_USE_LIBC_QSORT) +// allocate space for static class variable +Dump *Dump::dumpptr; +#else #include "mergesort.h" +#endif #define BIG 1.0e20 #define EPSILON 1.0e-6 @@ -687,12 +692,22 @@ void Dump::sort() index[idsort[i]-idlo] = i; } +#if defined(LMP_USE_LIBC_QSORT) + if (!reorderflag) { + dumpptr = this; + for (i = 0; i < nme; i++) index[i] = i; + if (sortcol == 0) qsort(index,nme,sizeof(int),idcompare); + else if (sortorder == ASCEND) qsort(index,nme,sizeof(int),bufcompare); + else qsort(index,nme,sizeof(int),bufcompare_reverse); + } +#else if (!reorderflag) { for (i = 0; i < nme; i++) index[i] = i; - if (sortcol==0) merge_sort(index,nme,(void *)this,idcompare); - else if (sortorder==ASCEND) merge_sort(index,nme,(void *)this,bufcompare); + if (sortcol == 0) merge_sort(index,nme,(void *)this,idcompare); + else if (sortorder == ASCEND) merge_sort(index,nme,(void *)this,bufcompare); else merge_sort(index,nme,(void *)this,bufcompare_reverse); } +#endif // reset buf size and maxbuf to largest of any post-sort nme values // this insures proc 0 can receive everyone's info @@ -713,6 +728,69 @@ void Dump::sort() memcpy(&buf[i*size_one],&bufsort[index[i]*size_one],nbytes); } +#if defined(LMP_USE_LIBC_QSORT) +/* ---------------------------------------------------------------------- + compare two atom IDs + called via qsort() in sort() method + is a static method so access data via dumpptr +------------------------------------------------------------------------- */ + +int Dump::idcompare(const void *pi, const void *pj) +{ + tagint *idsort = dumpptr->idsort; + + int i = *((int *) pi); + int j = *((int *) pj); + + if (idsort[i] < idsort[j]) return -1; + if (idsort[i] > idsort[j]) return 1; + return 0; +} + +/* ---------------------------------------------------------------------- + compare two buffer values with size_one stride + called via qsort() in sort() method + is a static method so access data via dumpptr + sort in ASCENDing order +------------------------------------------------------------------------- */ + +int Dump::bufcompare(const void *pi, const void *pj) +{ + double *bufsort = dumpptr->bufsort; + int size_one = dumpptr->size_one; + int sortcolm1 = dumpptr->sortcolm1; + + int i = *((int *) pi)*size_one + sortcolm1; + int j = *((int *) pj)*size_one + sortcolm1; + + if (bufsort[i] < bufsort[j]) return -1; + if (bufsort[i] > bufsort[j]) return 1; + return 0; +} + +/* ---------------------------------------------------------------------- + compare two buffer values with size_one stride + called via qsort() in sort() method + is a static method so access data via dumpptr + sort in DESCENDing order +------------------------------------------------------------------------- */ + +int Dump::bufcompare_reverse(const void *pi, const void *pj) +{ + double *bufsort = dumpptr->bufsort; + int size_one = dumpptr->size_one; + int sortcolm1 = dumpptr->sortcolm1; + + int i = *((int *) pi)*size_one + sortcolm1; + int j = *((int *) pj)*size_one + sortcolm1; + + if (bufsort[i] > bufsort[j]) return -1; + if (bufsort[i] < bufsort[j]) return 1; + return 0; +} + +#else + /* ---------------------------------------------------------------------- compare two atom IDs called via merge_sort() in sort() method @@ -745,6 +823,7 @@ int Dump::bufcompare(const int i, const int j, void *ptr) else if (bufsort[ii] > bufsort[jj]) return 1; else return 0; } + /* ---------------------------------------------------------------------- compare two buffer values with size_one stride called via merge_sort() in sort() method @@ -766,6 +845,8 @@ int Dump::bufcompare_reverse(const int i, const int j, void *ptr) else return 0; } +#endif + /* ---------------------------------------------------------------------- process params common to all dumps here if unknown param, call modify_param specific to the dump diff --git a/src/dump.h b/src/dump.h index ae83d75d20..3c1450854e 100644 --- a/src/dump.h +++ b/src/dump.h @@ -33,6 +33,11 @@ class Dump : protected Pointers { int comm_forward; // size of forward communication (0 if none) int comm_reverse; // size of reverse communication (0 if none) +#if defined(LMP_USE_LIBC_QSORT) + // static variable across all Dump objects + static Dump *dumpptr; // holds a ptr to Dump currently being used +#endif + Dump(class LAMMPS *, int, char **); virtual ~Dump(); void init(); @@ -130,9 +135,15 @@ class Dump : protected Pointers { void pbc_allocate(); void sort(); +#if defined(LMP_USE_LIBC_QSORT) + static int idcompare(const void *, const void *); + static int bufcompare(const void *, const void *); + static int bufcompare_reverse(const void *, const void *); +#else static int idcompare(const int, const int, void *); static int bufcompare(const int, const int, void *); static int bufcompare_reverse(const int, const int, void *); +#endif }; } diff --git a/src/irregular.cpp b/src/irregular.cpp index 94882ee65a..74fdb6f772 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -21,13 +21,22 @@ #include "comm.h" #include "memory.h" -#include "mergesort.h" - using namespace LAMMPS_NS; +#if defined(LMP_USE_LIBC_QSORT) +// allocate space for static class variable // prototype for non-class function +int *Irregular::proc_recv_copy; +static int compare_standalone(const void *, const void *); + +#else + +#include "mergesort.h" + +// prototype for non-class function static int compare_standalone(const int, const int, void *); +#endif enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files @@ -423,7 +432,13 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) int *length_recv_ordered = new int[nrecv_proc]; for (i = 0; i < nrecv_proc; i++) order[i] = i; + +#if defined(LMP_USE_LIBC_QSORT) + proc_recv_copy = proc_recv; + qsort(order,nrecv_proc,sizeof(int),compare_standalone); +#else merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#endif int j; for (i = 0; i < nrecv_proc; i++) { @@ -449,6 +464,25 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) return nrecvsize; } +#if defined(LMP_USE_LIBC_QSORT) + +/* ---------------------------------------------------------------------- + comparison function invoked by qsort() + accesses static class member proc_recv_copy, set before call to qsort() +------------------------------------------------------------------------- */ + +int compare_standalone(const void *iptr, const void *jptr) +{ + int i = *((int *) iptr); + int j = *((int *) jptr); + int *proc_recv = Irregular::proc_recv_copy; + if (proc_recv[i] < proc_recv[j]) return -1; + if (proc_recv[i] > proc_recv[j]) return 1; + return 0; +} + +#else + /* ---------------------------------------------------------------------- comparison function invoked by merge_sort() void pointer contains proc_recv list; @@ -462,6 +496,8 @@ int compare_standalone(const int i, const int j, void *ptr) return 0; } +#endif + /* ---------------------------------------------------------------------- communicate atoms via PlanAtom sendbuf = list of atoms to send @@ -668,8 +704,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag) int *num_recv_ordered = new int[nrecv_proc]; for (i = 0; i < nrecv_proc; i++) order[i] = i; - merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#if defined(LMP_USE_LIBC_QSORT) + proc_recv_copy = proc_recv; + qsort(order,nrecv_proc,sizeof(int),compare_standalone); +#else + merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#endif int j; for (i = 0; i < nrecv_proc; i++) { j = order[i]; diff --git a/src/irregular.h b/src/irregular.h index f759b4e698..5b2a771847 100644 --- a/src/irregular.h +++ b/src/irregular.h @@ -21,6 +21,12 @@ namespace LAMMPS_NS { class Irregular : protected Pointers { public: +#if defined(LMP_USE_LIBC_QSORT) + // static variable across all Irregular objects, for qsort callback + + static int *proc_recv_copy; +#endif + Irregular(class LAMMPS *); ~Irregular(); void migrate_atoms(int sortflag = 0, int preassign = 0, diff --git a/src/mergesort.h b/src/mergesort.h index baa6b7830b..3597db52fb 100644 --- a/src/mergesort.h +++ b/src/mergesort.h @@ -20,8 +20,15 @@ // an opaque pointer to the comparison function, e.g. for access to // class members. this avoids having to use global variables. // for improved performance, we employ an in-place insertion sort on -// chunks of up to 32 elements and switch to merge sort from then on. +// chunks of up to 64 elements and switch to merge sort from then on. +// compile with -DLMP_USE_MERGE_SORT to switch to a plain merge sort + +#if !defined(LMP_USE_MERGE_SORT) && !defined(LMP_USE_HYBRID_SORT) +#define LMP_USE_HYBRID_SORT +#endif + +#if defined(LMP_USE_HYBRID_SORT) // part 1. insertion sort for pre-sorting of small chunks static void insertion_sort(int *index, int num, void *ptr, @@ -41,6 +48,7 @@ static void insertion_sort(int *index, int num, void *ptr, } } } +#endif // part 2. merge two sublists @@ -55,7 +63,7 @@ static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, idx[i++] = buf[l++]; else idx[i++] = buf[r++]; } - + while (l < lhi) idx[i++] = buf[l++]; while (r < rhi) idx[i++] = buf[r++]; } @@ -70,9 +78,11 @@ static void merge_sort(int *index, int num, void *ptr, int chunk,i,j; - // do insertion sort on chunks of up to 32 elements +#if defined(LMP_USE_HYBRID_SORT) + + // do insertion sort on chunks of up to 64 elements - chunk = 32; + chunk = 64; for (i=0; i < num; i += chunk) { j = (i+chunk > num) ? num-i : chunk; insertion_sort(index+i,j,ptr,comp); @@ -82,6 +92,10 @@ static void merge_sort(int *index, int num, void *ptr, if (chunk >= num) return; +#else + chunk = 1; +#endif + // continue with merge sort on the pre-sorted chunks. // we need an extra buffer for temporary storage and two // pointers to operate on, so we can swap the pointers -- GitLab