diff --git a/src/dump.cpp b/src/dump.cpp index f8896c8fee91cc8638a7559df43780b32a632e5c..dde89024ff7604fed0457b13f53dae10c0f8e0c3 100644 --- a/src/dump.cpp +++ b/src/dump.cpp @@ -30,9 +30,12 @@ using namespace LAMMPS_NS; +#if defined(LMP_USE_LIBC_QSORT) // allocate space for static class variable - Dump *Dump::dumpptr; +#else +#include "mergesort.h" +#endif #define BIG 1.0e20 #define EPSILON 1.0e-6 @@ -82,7 +85,7 @@ Dump::Dump(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) buffer_flag = 0; padflag = 0; pbcflag = 0; - + maxbuf = maxids = maxsort = maxproc = 0; buf = bufsort = NULL; ids = idsort = NULL; @@ -168,13 +171,13 @@ Dump::~Dump() delete irregular; memory->destroy(sbuf); - + if (pbcflag) { memory->destroy(xpbc); memory->destroy(vpbc); memory->destroy(imagepbc); } - + if (multiproc) MPI_Comm_free(&clustercomm); // XTC style sets fp to NULL since it closes file in its destructor @@ -275,7 +278,7 @@ void Dump::init() } // preallocation for PBC copies if requested - + if (pbcflag && atom->nlocal > maxpbc) pbc_allocate(); } @@ -388,7 +391,7 @@ void Dump::write() atom->image = imagepbc; domain->pbc(); } - + // pack my data into buf // if sorting on IDs also request ID list from pack() // sort buf as needed @@ -689,6 +692,7 @@ void Dump::sort() index[idsort[i]-idlo] = i; } +#if defined(LMP_USE_LIBC_QSORT) if (!reorderflag) { dumpptr = this; for (i = 0; i < nme; i++) index[i] = i; @@ -696,6 +700,14 @@ void Dump::sort() else if (sortorder == ASCEND) qsort(index,nme,sizeof(int),bufcompare); else qsort(index,nme,sizeof(int),bufcompare_reverse); } +#else + if (!reorderflag) { + for (i = 0; i < nme; i++) index[i] = i; + if (sortcol == 0) merge_sort(index,nme,(void *)this,idcompare); + else if (sortorder == ASCEND) merge_sort(index,nme,(void *)this,bufcompare); + else merge_sort(index,nme,(void *)this,bufcompare_reverse); + } +#endif // reset buf size and maxbuf to largest of any post-sort nme values // this insures proc 0 can receive everyone's info @@ -716,6 +728,7 @@ void Dump::sort() memcpy(&buf[i*size_one],&bufsort[index[i]*size_one],nbytes); } +#if defined(LMP_USE_LIBC_QSORT) /* ---------------------------------------------------------------------- compare two atom IDs called via qsort() in sort() method @@ -776,6 +789,64 @@ int Dump::bufcompare_reverse(const void *pi, const void *pj) return 0; } +#else + +/* ---------------------------------------------------------------------- + compare two atom IDs + called via merge_sort() in sort() method +------------------------------------------------------------------------- */ +int Dump::idcompare(const int i, const int j, void *ptr) +{ + tagint *idsort = ((Dump *)ptr)->idsort; + if (idsort[i] < idsort[j]) return -1; + else if (idsort[i] > idsort[j]) return 1; + else return 0; +} + +/* ---------------------------------------------------------------------- + compare two buffer values with size_one stride + called via merge_sort() in sort() method + sort in ASCENDing order +------------------------------------------------------------------------- */ + +int Dump::bufcompare(const int i, const int j, void *ptr) +{ + Dump *dptr = (Dump *) ptr; + double *bufsort = dptr->bufsort; + const int size_one = dptr->size_one; + const int sortcolm1 = dptr->sortcolm1; + + const int ii=i*size_one + sortcolm1; + const int jj=j*size_one + sortcolm1; + + if (bufsort[ii] < bufsort[jj]) return -1; + else if (bufsort[ii] > bufsort[jj]) return 1; + else return 0; +} + +/* ---------------------------------------------------------------------- + compare two buffer values with size_one stride + called via merge_sort() in sort() method + sort in DESCENDing order +------------------------------------------------------------------------- */ + +int Dump::bufcompare_reverse(const int i, const int j, void *ptr) +{ + Dump *dptr = (Dump *) ptr; + double *bufsort = dptr->bufsort; + const int size_one = dptr->size_one; + const int sortcolm1 = dptr->sortcolm1; + + const int ii=i*size_one + sortcolm1; + const int jj=j*size_one + sortcolm1; + + if (bufsort[ii] < bufsort[jj]) return 1; + else if (bufsort[ii] > bufsort[jj]) return -1; + else return 0; +} + +#endif + /* ---------------------------------------------------------------------- process params common to all dumps here if unknown param, call modify_param specific to the dump diff --git a/src/dump.h b/src/dump.h index c4d93352013d50cb8f27878d71bfb3855e5969b8..3c1450854ecb4eb0df1cd6216c9b84931b91bb0f 100644 --- a/src/dump.h +++ b/src/dump.h @@ -33,9 +33,10 @@ class Dump : protected Pointers { int comm_forward; // size of forward communication (0 if none) int comm_reverse; // size of reverse communication (0 if none) +#if defined(LMP_USE_LIBC_QSORT) // static variable across all Dump objects - static Dump *dumpptr; // holds a ptr to Dump currently being used +#endif Dump(class LAMMPS *, int, char **); virtual ~Dump(); @@ -132,11 +133,17 @@ class Dump : protected Pointers { virtual int convert_string(int, double *) {return 0;} virtual void write_data(int, double *) = 0; void pbc_allocate(); - + void sort(); +#if defined(LMP_USE_LIBC_QSORT) static int idcompare(const void *, const void *); static int bufcompare(const void *, const void *); static int bufcompare_reverse(const void *, const void *); +#else + static int idcompare(const int, const int, void *); + static int bufcompare(const int, const int, void *); + static int bufcompare_reverse(const int, const int, void *); +#endif }; } diff --git a/src/irregular.cpp b/src/irregular.cpp index d0210244fbd830b1f4d493c23af37907a8b3f4f4..74fdb6f772420776f28d1d57b33af5e23809b354 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -23,11 +23,20 @@ using namespace LAMMPS_NS; +#if defined(LMP_USE_LIBC_QSORT) // allocate space for static class variable // prototype for non-class function int *Irregular::proc_recv_copy; -int compare_standalone(const void *, const void *); +static int compare_standalone(const void *, const void *); + +#else + +#include "mergesort.h" + +// prototype for non-class function +static int compare_standalone(const int, const int, void *); +#endif enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files @@ -423,8 +432,13 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) int *length_recv_ordered = new int[nrecv_proc]; for (i = 0; i < nrecv_proc; i++) order[i] = i; + +#if defined(LMP_USE_LIBC_QSORT) proc_recv_copy = proc_recv; qsort(order,nrecv_proc,sizeof(int),compare_standalone); +#else + merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#endif int j; for (i = 0; i < nrecv_proc; i++) { @@ -450,6 +464,8 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) return nrecvsize; } +#if defined(LMP_USE_LIBC_QSORT) + /* ---------------------------------------------------------------------- comparison function invoked by qsort() accesses static class member proc_recv_copy, set before call to qsort() @@ -465,6 +481,23 @@ int compare_standalone(const void *iptr, const void *jptr) return 0; } +#else + +/* ---------------------------------------------------------------------- + comparison function invoked by merge_sort() + void pointer contains proc_recv list; +------------------------------------------------------------------------- */ + +int compare_standalone(const int i, const int j, void *ptr) +{ + int *proc_recv = (int *) ptr; + if (proc_recv[i] < proc_recv[j]) return -1; + if (proc_recv[i] > proc_recv[j]) return 1; + return 0; +} + +#endif + /* ---------------------------------------------------------------------- communicate atoms via PlanAtom sendbuf = list of atoms to send @@ -671,9 +704,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag) int *num_recv_ordered = new int[nrecv_proc]; for (i = 0; i < nrecv_proc; i++) order[i] = i; + +#if defined(LMP_USE_LIBC_QSORT) proc_recv_copy = proc_recv; qsort(order,nrecv_proc,sizeof(int),compare_standalone); - +#else + merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#endif int j; for (i = 0; i < nrecv_proc; i++) { j = order[i]; diff --git a/src/irregular.h b/src/irregular.h index ea0fee2eb83c4c3d0002633a362292d36a92757e..5b2a7718472dc6d35d8af16eaaf360cae77d6f33 100644 --- a/src/irregular.h +++ b/src/irregular.h @@ -21,9 +21,11 @@ namespace LAMMPS_NS { class Irregular : protected Pointers { public: +#if defined(LMP_USE_LIBC_QSORT) // static variable across all Irregular objects, for qsort callback static int *proc_recv_copy; +#endif Irregular(class LAMMPS *); ~Irregular(); diff --git a/src/mergesort.h b/src/mergesort.h new file mode 100644 index 0000000000000000000000000000000000000000..1df6cb4b819375e68fbdd2c745a4c7cee4a49009 --- /dev/null +++ b/src/mergesort.h @@ -0,0 +1,120 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_MERGESORT +#define LMP_MERGESORT + +#include <string.h> + +// custom hybrid upward merge sort implementation with support to pass +// an opaque pointer to the comparison function, e.g. for access to +// class members. this avoids having to use global variables. +// for improved performance, we employ an in-place insertion sort on +// chunks of up to 64 elements and switch to merge sort from then on. + +// part 1. insertion sort for pre-sorting of small chunks + +static void insertion_sort(int *index, int num, void *ptr, + int (*comp)(int, int, void*)) +{ + if (num < 2) return; + for (int i=1; i < num; ++i) { + int tmp = index[i]; + for (int j=i-1; j >= 0; --j) { + if ((*comp)(index[j],tmp,ptr) > 0) { + index[j+1] = index[j]; + } else { + index[j+1] = tmp; + break; + } + if (j == 0) index[0] = tmp; + } + } +} + +// part 2. merge two sublists + +static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, + void *ptr, int (*comp)(int, int, void *)) +{ + int i = llo; + int l = llo; + int r = rlo; + while ((l < lhi) && (r < rhi)) { + if ((*comp)(buf[l],buf[r],ptr) < 0) + idx[i++] = buf[l++]; + else idx[i++] = buf[r++]; + } + + while (l < lhi) idx[i++] = buf[l++]; + while (r < rhi) idx[i++] = buf[r++]; +} + +// part 3: loop over sublists doubling in size with each iteration. +// pre-sort sublists with insertion sort for better performance. + +static void merge_sort(int *index, int num, void *ptr, + int (*comp)(int, int, void *)) +{ + if (num < 2) return; + + int chunk,i,j; + + // do insertion sort on chunks of up to 64 elements + + chunk = 64; + for (i=0; i < num; i += chunk) { + j = (i+chunk > num) ? num-i : chunk; + insertion_sort(index+i,j,ptr,comp); + } + + // already done? + + if (chunk >= num) return; + + // continue with merge sort on the pre-sorted chunks. + // we need an extra buffer for temporary storage and two + // pointers to operate on, so we can swap the pointers + // rather than copying to the hold buffer in each pass + + int *buf = new int[num]; + int *dest = index; + int *hold = buf; + + while (chunk < num) { + int m; + + // swap hold and destination buffer + + int *tmp = dest; dest = hold; hold = tmp; + + // merge from hold array to destiation array + + for (i=0; i < num-1; i += 2*chunk) { + j = i + 2*chunk; + if (j > num) j=num; + m = i+chunk; + if (m > num) m=num; + do_merge(dest,hold,i,m,m,j,ptr,comp); + } + chunk *= 2; + } + + // if the final sorted data is in buf, copy back to index + + if (dest == buf) memcpy(index,buf,sizeof(int)*num); + + delete[] buf; +} + +#endif