diff --git a/src/dump.cpp b/src/dump.cpp
index f8896c8fee91cc8638a7559df43780b32a632e5c..dde89024ff7604fed0457b13f53dae10c0f8e0c3 100644
--- a/src/dump.cpp
+++ b/src/dump.cpp
@@ -30,9 +30,12 @@
 
 using namespace LAMMPS_NS;
 
+#if defined(LMP_USE_LIBC_QSORT)
 // allocate space for static class variable
-
 Dump *Dump::dumpptr;
+#else
+#include "mergesort.h"
+#endif
 
 #define BIG 1.0e20
 #define EPSILON 1.0e-6
@@ -82,7 +85,7 @@ Dump::Dump(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
   buffer_flag = 0;
   padflag = 0;
   pbcflag = 0;
-  
+
   maxbuf = maxids = maxsort = maxproc = 0;
   buf = bufsort = NULL;
   ids = idsort = NULL;
@@ -168,13 +171,13 @@ Dump::~Dump()
   delete irregular;
 
   memory->destroy(sbuf);
-  
+
   if (pbcflag) {
     memory->destroy(xpbc);
     memory->destroy(vpbc);
     memory->destroy(imagepbc);
   }
-  
+
   if (multiproc) MPI_Comm_free(&clustercomm);
 
   // XTC style sets fp to NULL since it closes file in its destructor
@@ -275,7 +278,7 @@ void Dump::init()
   }
 
   // preallocation for PBC copies if requested
-  
+
   if (pbcflag && atom->nlocal > maxpbc) pbc_allocate();
 }
 
@@ -388,7 +391,7 @@ void Dump::write()
     atom->image = imagepbc;
     domain->pbc();
   }
-  
+
   // pack my data into buf
   // if sorting on IDs also request ID list from pack()
   // sort buf as needed
@@ -689,6 +692,7 @@ void Dump::sort()
         index[idsort[i]-idlo] = i;
   }
 
+#if defined(LMP_USE_LIBC_QSORT)
   if (!reorderflag) {
     dumpptr = this;
     for (i = 0; i < nme; i++) index[i] = i;
@@ -696,6 +700,14 @@ void Dump::sort()
     else if (sortorder == ASCEND) qsort(index,nme,sizeof(int),bufcompare);
     else qsort(index,nme,sizeof(int),bufcompare_reverse);
   }
+#else
+  if (!reorderflag) {
+    for (i = 0; i < nme; i++) index[i] = i;
+    if (sortcol == 0) merge_sort(index,nme,(void *)this,idcompare);
+    else if (sortorder == ASCEND) merge_sort(index,nme,(void *)this,bufcompare);
+    else merge_sort(index,nme,(void *)this,bufcompare_reverse);
+  }
+#endif
 
   // reset buf size and maxbuf to largest of any post-sort nme values
   // this insures proc 0 can receive everyone's info
@@ -716,6 +728,7 @@ void Dump::sort()
     memcpy(&buf[i*size_one],&bufsort[index[i]*size_one],nbytes);
 }
 
+#if defined(LMP_USE_LIBC_QSORT)
 /* ----------------------------------------------------------------------
    compare two atom IDs
    called via qsort() in sort() method
@@ -776,6 +789,64 @@ int Dump::bufcompare_reverse(const void *pi, const void *pj)
   return 0;
 }
 
+#else
+
+/* ----------------------------------------------------------------------
+   compare two atom IDs
+   called via merge_sort() in sort() method
+------------------------------------------------------------------------- */
+int Dump::idcompare(const int i, const int j, void *ptr)
+{
+  tagint *idsort = ((Dump *)ptr)->idsort;
+  if (idsort[i] < idsort[j]) return -1;
+  else if (idsort[i] > idsort[j]) return 1;
+  else return 0;
+}
+
+/* ----------------------------------------------------------------------
+   compare two buffer values with size_one stride
+   called via merge_sort() in sort() method
+   sort in ASCENDing order
+------------------------------------------------------------------------- */
+
+int Dump::bufcompare(const int i, const int j, void *ptr)
+{
+  Dump *dptr = (Dump *) ptr;
+  double *bufsort     = dptr->bufsort;
+  const int size_one  = dptr->size_one;
+  const int sortcolm1 = dptr->sortcolm1;
+
+  const int ii=i*size_one + sortcolm1;
+  const int jj=j*size_one + sortcolm1;
+
+  if (bufsort[ii] < bufsort[jj]) return -1;
+  else if (bufsort[ii] > bufsort[jj]) return 1;
+  else return 0;
+}
+
+/* ----------------------------------------------------------------------
+   compare two buffer values with size_one stride
+   called via merge_sort() in sort() method
+   sort in DESCENDing order
+------------------------------------------------------------------------- */
+
+int Dump::bufcompare_reverse(const int i, const int j, void *ptr)
+{
+  Dump *dptr = (Dump *) ptr;
+  double *bufsort     = dptr->bufsort;
+  const int size_one  = dptr->size_one;
+  const int sortcolm1 = dptr->sortcolm1;
+
+  const int ii=i*size_one + sortcolm1;
+  const int jj=j*size_one + sortcolm1;
+
+  if (bufsort[ii] < bufsort[jj]) return 1;
+  else if (bufsort[ii] > bufsort[jj]) return -1;
+  else return 0;
+}
+
+#endif
+
 /* ----------------------------------------------------------------------
    process params common to all dumps here
    if unknown param, call modify_param specific to the dump
diff --git a/src/dump.h b/src/dump.h
index c4d93352013d50cb8f27878d71bfb3855e5969b8..3c1450854ecb4eb0df1cd6216c9b84931b91bb0f 100644
--- a/src/dump.h
+++ b/src/dump.h
@@ -33,9 +33,10 @@ class Dump : protected Pointers {
   int comm_forward;          // size of forward communication (0 if none)
   int comm_reverse;          // size of reverse communication (0 if none)
 
+#if defined(LMP_USE_LIBC_QSORT)
   // static variable across all Dump objects
-
   static Dump *dumpptr;         // holds a ptr to Dump currently being used
+#endif
 
   Dump(class LAMMPS *, int, char **);
   virtual ~Dump();
@@ -132,11 +133,17 @@ class Dump : protected Pointers {
   virtual int convert_string(int, double *) {return 0;}
   virtual void write_data(int, double *) = 0;
   void pbc_allocate();
-    
+
   void sort();
+#if defined(LMP_USE_LIBC_QSORT)
   static int idcompare(const void *, const void *);
   static int bufcompare(const void *, const void *);
   static int bufcompare_reverse(const void *, const void *);
+#else
+  static int idcompare(const int, const int, void *);
+  static int bufcompare(const int, const int, void *);
+  static int bufcompare_reverse(const int, const int, void *);
+#endif
 };
 
 }
diff --git a/src/irregular.cpp b/src/irregular.cpp
index d0210244fbd830b1f4d493c23af37907a8b3f4f4..74fdb6f772420776f28d1d57b33af5e23809b354 100644
--- a/src/irregular.cpp
+++ b/src/irregular.cpp
@@ -23,11 +23,20 @@
 
 using namespace LAMMPS_NS;
 
+#if defined(LMP_USE_LIBC_QSORT)
 // allocate space for static class variable
 // prototype for non-class function
 
 int *Irregular::proc_recv_copy;
-int compare_standalone(const void *, const void *);
+static int compare_standalone(const void *, const void *);
+
+#else
+
+#include "mergesort.h"
+
+// prototype for non-class function
+static int compare_standalone(const int, const int, void *);
+#endif
 
 enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED};    // several files
 
@@ -423,8 +432,13 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
     int *length_recv_ordered = new int[nrecv_proc];
 
     for (i = 0; i < nrecv_proc; i++) order[i] = i;
+
+#if defined(LMP_USE_LIBC_QSORT)
     proc_recv_copy = proc_recv;
     qsort(order,nrecv_proc,sizeof(int),compare_standalone);
+#else
+    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+#endif
 
     int j;
     for (i = 0; i < nrecv_proc; i++) {
@@ -450,6 +464,8 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
   return nrecvsize;
 }
 
+#if defined(LMP_USE_LIBC_QSORT)
+
 /* ----------------------------------------------------------------------
    comparison function invoked by qsort()
    accesses static class member proc_recv_copy, set before call to qsort()
@@ -465,6 +481,23 @@ int compare_standalone(const void *iptr, const void *jptr)
   return 0;
 }
 
+#else
+
+/* ----------------------------------------------------------------------
+   comparison function invoked by merge_sort()
+   void pointer contains proc_recv list;
+------------------------------------------------------------------------- */
+
+int compare_standalone(const int i, const int j, void *ptr)
+{
+  int *proc_recv = (int *) ptr;
+  if (proc_recv[i] < proc_recv[j]) return -1;
+  if (proc_recv[i] > proc_recv[j]) return 1;
+  return 0;
+}
+
+#endif
+
 /* ----------------------------------------------------------------------
    communicate atoms via PlanAtom
    sendbuf = list of atoms to send
@@ -671,9 +704,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
     int *num_recv_ordered = new int[nrecv_proc];
 
     for (i = 0; i < nrecv_proc; i++) order[i] = i;
+
+#if defined(LMP_USE_LIBC_QSORT)
     proc_recv_copy = proc_recv;
     qsort(order,nrecv_proc,sizeof(int),compare_standalone);
-
+#else
+    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+#endif
     int j;
     for (i = 0; i < nrecv_proc; i++) {
       j = order[i];
diff --git a/src/irregular.h b/src/irregular.h
index ea0fee2eb83c4c3d0002633a362292d36a92757e..5b2a7718472dc6d35d8af16eaaf360cae77d6f33 100644
--- a/src/irregular.h
+++ b/src/irregular.h
@@ -21,9 +21,11 @@ namespace LAMMPS_NS {
 class Irregular : protected Pointers {
  public:
 
+#if defined(LMP_USE_LIBC_QSORT)
   // static variable across all Irregular objects, for qsort callback
 
   static int *proc_recv_copy;
+#endif
 
   Irregular(class LAMMPS *);
   ~Irregular();
diff --git a/src/mergesort.h b/src/mergesort.h
new file mode 100644
index 0000000000000000000000000000000000000000..1df6cb4b819375e68fbdd2c745a4c7cee4a49009
--- /dev/null
+++ b/src/mergesort.h
@@ -0,0 +1,120 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_MERGESORT
+#define LMP_MERGESORT
+
+#include <string.h>
+
+// custom hybrid upward merge sort implementation with support to pass
+// an opaque pointer to the comparison function, e.g. for access to
+// class members. this avoids having to use global variables.
+// for improved performance, we employ an in-place insertion sort on
+// chunks of up to 64 elements and switch to merge sort from then on.
+
+// part 1. insertion sort for pre-sorting of small chunks
+
+static void insertion_sort(int *index, int num, void *ptr,
+                           int (*comp)(int, int, void*))
+{
+  if (num < 2) return;
+  for (int i=1; i < num; ++i) {
+    int tmp = index[i];
+    for (int j=i-1; j >= 0; --j) {
+      if ((*comp)(index[j],tmp,ptr) > 0) {
+        index[j+1] = index[j];
+      } else {
+        index[j+1] = tmp;
+        break;
+      }
+      if (j == 0) index[0] = tmp;
+    }
+  }
+}
+
+// part 2. merge two sublists
+
+static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
+                     void *ptr, int (*comp)(int, int, void *))
+{
+  int i = llo;
+  int l = llo;
+  int r = rlo;
+  while ((l < lhi) && (r < rhi)) {
+    if ((*comp)(buf[l],buf[r],ptr) < 0)
+      idx[i++] = buf[l++];
+    else idx[i++] = buf[r++];
+  }
+
+  while (l < lhi) idx[i++] = buf[l++];
+  while (r < rhi) idx[i++] = buf[r++];
+}
+
+// part 3: loop over sublists doubling in size with each iteration.
+//         pre-sort sublists with insertion sort for better performance.
+
+static void merge_sort(int *index, int num, void *ptr,
+                       int (*comp)(int, int, void *))
+{
+  if (num < 2) return;
+
+  int chunk,i,j;
+
+  // do insertion sort on chunks of up to 64 elements
+
+  chunk = 64;
+  for (i=0; i < num; i += chunk) {
+    j = (i+chunk > num) ? num-i : chunk;
+    insertion_sort(index+i,j,ptr,comp);
+  }
+
+  // already done?
+
+  if (chunk >= num) return;
+
+  // continue with merge sort on the pre-sorted chunks.
+  // we need an extra buffer for temporary storage and two
+  // pointers to operate on, so we can swap the pointers
+  // rather than copying to the hold buffer in each pass
+
+  int *buf = new int[num];
+  int *dest = index;
+  int *hold = buf;
+
+  while (chunk < num) {
+    int m;
+
+    // swap hold and destination buffer
+
+    int *tmp = dest; dest = hold; hold = tmp;
+
+    // merge from hold array to destiation array
+
+    for (i=0; i < num-1; i += 2*chunk) {
+      j = i + 2*chunk;
+      if (j > num) j=num;
+      m = i+chunk;
+      if (m > num) m=num;
+      do_merge(dest,hold,i,m,m,j,ptr,comp);
+    }
+    chunk *= 2;
+  }
+
+  // if the final sorted data is in buf, copy back to index
+
+  if (dest == buf) memcpy(index,buf,sizeof(int)*num);
+
+  delete[] buf;
+}
+
+#endif