src/stdlib/SDL_qsort.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 06 Feb 2006 08:46:14 +0000
changeset 1331 1cbaeee565b1
parent 1330 450721ad5436
child 1336 3692456e7b0f
permissions -rw-r--r--
A few fixes to get this building on Linux again
     1 /* qsort.c
     2  * (c) 1998 Gareth McCaughan
     3  *
     4  * This is a drop-in replacement for the C library's |qsort()| routine.
     5  *
     6  * Features:
     7  *   - Median-of-three pivoting (and more)
     8  *   - Truncation and final polishing by a single insertion sort
     9  *   - Early truncation when no swaps needed in pivoting step
    10  *   - Explicit recursion, guaranteed not to overflow
    11  *   - A few little wrinkles stolen from the GNU |qsort()|.
    12  *   - separate code for non-aligned / aligned / word-size objects
    13  *
    14  * This code may be reproduced freely provided
    15  *   - this file is retained unaltered apart from minor
    16  *     changes for portability and efficiency
    17  *   - no changes are made to this comment
    18  *   - any changes that *are* made are clearly flagged
    19  *   - the _ID string below is altered by inserting, after
    20  *     the date, the string " altered" followed at your option
    21  *     by other material. (Exceptions: you may change the name
    22  *     of the exported routine without changing the ID string.
    23  *     You may change the values of the macros TRUNC_* and
    24  *     PIVOT_THRESHOLD without changing the ID string, provided
    25  *     they remain constants with TRUNC_nonaligned, TRUNC_aligned
    26  *     and TRUNC_words/WORD_BYTES between 8 and 24, and
    27  *     PIVOT_THRESHOLD between 32 and 200.)
    28  *
    29  * You may use it in anything you like; you may make money
    30  * out of it; you may distribute it in object form or as
    31  * part of an executable without including source code;
    32  * you don't have to credit me. (But it would be nice if
    33  * you did.)
    34  *
    35  * If you find problems with this code, or find ways of
    36  * making it significantly faster, please let me know!
    37  * My e-mail address, valid as of early 1998 and certainly
    38  * OK for at least the next 18 months, is
    39  *    gjm11@dpmms.cam.ac.uk
    40  * Thanks!
    41  *
    42  * Gareth McCaughan   Peterhouse   Cambridge   1998
    43  */
    44 
    45 /*
    46 #include <assert.h>
    47 #include <stdlib.h>
    48 #include <string.h>
    49 */
    50 #define assert(X)
    51 #include "SDL_stdlib.h"
    52 #include "SDL_string.h"
    53 
    54 #ifndef HAVE_QSORT
    55 
    56 static char _ID[]="<qsort.c gjm 1.12 1998-03-19>";
    57 
    58 /* How many bytes are there per word? (Must be a power of 2,
    59  * and must in fact equal sizeof(int).)
    60  */
    61 #define WORD_BYTES sizeof(int)
    62 
    63 /* How big does our stack need to be? Answer: one entry per
    64  * bit in a |size_t|.
    65  */
    66 #define STACK_SIZE (8*sizeof(size_t))
    67 
    68 /* Different situations have slightly different requirements,
    69  * and we make life epsilon easier by using different truncation
    70  * points for the three different cases.
    71  * So far, I have tuned TRUNC_words and guessed that the same
    72  * value might work well for the other two cases. Of course
    73  * what works well on my machine might work badly on yours.
    74  */
    75 #define TRUNC_nonaligned	12
    76 #define TRUNC_aligned		12
    77 #define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
    78 
    79 /* We use a simple pivoting algorithm for shortish sub-arrays
    80  * and a more complicated one for larger ones. The threshold
    81  * is PIVOT_THRESHOLD.
    82  */
    83 #define PIVOT_THRESHOLD 40
    84 
    85 typedef struct { char * first; char * last; } stack_entry;
    86 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
    87 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
    88 #define doLeft {first=ffirst;llast=last;continue;}
    89 #define doRight {ffirst=first;last=llast;continue;}
    90 #define pop {if (--stacktop<0) break;\
    91   first=ffirst=stack[stacktop].first;\
    92   last=llast=stack[stacktop].last;\
    93   continue;}
    94 
    95 /* Some comments on the implementation.
    96  * 1. When we finish partitioning the array into "low"
    97  *    and "high", we forget entirely about short subarrays,
    98  *    because they'll be done later by insertion sort.
    99  *    Doing lots of little insertion sorts might be a win
   100  *    on large datasets for locality-of-reference reasons,
   101  *    but it makes the code much nastier and increases
   102  *    bookkeeping overhead.
   103  * 2. We always save the shorter and get to work on the
   104  *    longer. This guarantees that every time we push
   105  *    an item onto the stack its size is <= 1/2 of that
   106  *    of its parent; so the stack can't need more than
   107  *    log_2(max-array-size) entries.
   108  * 3. We choose a pivot by looking at the first, last
   109  *    and middle elements. We arrange them into order
   110  *    because it's easy to do that in conjunction with
   111  *    choosing the pivot, and it makes things a little
   112  *    easier in the partitioning step. Anyway, the pivot
   113  *    is the middle of these three. It's still possible
   114  *    to construct datasets where the algorithm takes
   115  *    time of order n^2, but it simply never happens in
   116  *    practice.
   117  * 3' Newsflash: On further investigation I find that
   118  *    it's easy to construct datasets where median-of-3
   119  *    simply isn't good enough. So on large-ish subarrays
   120  *    we do a more sophisticated pivoting: we take three
   121  *    sets of 3 elements, find their medians, and then
   122  *    take the median of those.
   123  * 4. We copy the pivot element to a separate place
   124  *    because that way we can always do our comparisons
   125  *    directly against a pointer to that separate place,
   126  *    and don't have to wonder "did we move the pivot
   127  *    element?". This makes the inner loop better.
   128  * 5. It's possible to make the pivoting even more
   129  *    reliable by looking at more candidates when n
   130  *    is larger. (Taking this to its logical conclusion
   131  *    results in a variant of quicksort that doesn't
   132  *    have that n^2 worst case.) However, the overhead
   133  *    from the extra bookkeeping means that it's just
   134  *    not worth while.
   135  * 6. This is pretty clean and portable code. Here are
   136  *    all the potential portability pitfalls and problems
   137  *    I know of:
   138  *      - In one place (the insertion sort) I construct
   139  *        a pointer that points just past the end of the
   140  *        supplied array, and assume that (a) it won't
   141  *        compare equal to any pointer within the array,
   142  *        and (b) it will compare equal to a pointer
   143  *        obtained by stepping off the end of the array.
   144  *        These might fail on some segmented architectures.
   145  *      - I assume that there are 8 bits in a |char| when
   146  *        computing the size of stack needed. This would
   147  *        fail on machines with 9-bit or 16-bit bytes.
   148  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
   149  *        and |(size&(sizeof(int)-1))==0| then it's safe to
   150  *        get at array elements via |int*|s, and that if
   151  *        actually |size==sizeof(int)| as well then it's
   152  *        safe to treat the elements as |int|s. This might
   153  *        fail on systems that convert pointers to integers
   154  *        in non-standard ways.
   155  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
   156  *        would be false on a machine with 8-bit |char|s,
   157  *        16-bit |int|s and 4096-bit |size_t|s. :-)
   158  */
   159 
   160 /* The recursion logic is the same in each case: */
   161 #define Recurse(Trunc)				\
   162       { size_t l=last-ffirst,r=llast-first;	\
   163         if (l<Trunc) {				\
   164           if (r>=Trunc) doRight			\
   165           else pop				\
   166         }					\
   167         else if (l<=r) { pushLeft; doRight }	\
   168         else if (r>=Trunc) { pushRight; doLeft }\
   169         else doLeft				\
   170       }
   171 
   172 /* and so is the pivoting logic: */
   173 #define Pivot(swapper,sz)			\
   174   if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
   175   else {	\
   176     if (compare(first,mid)<0) {			\
   177       if (compare(mid,last)>0) {		\
   178         swapper(mid,last);			\
   179         if (compare(first,mid)>0) swapper(first,mid);\
   180       }						\
   181     }						\
   182     else {					\
   183       if (compare(mid,last)>0) swapper(first,last)\
   184       else {					\
   185         swapper(first,mid);			\
   186         if (compare(mid,last)>0) swapper(mid,last);\
   187       }						\
   188     }						\
   189     first+=sz; last-=sz;			\
   190   }
   191 
   192 #ifdef DEBUG_QSORT
   193 #include <stdio.h>
   194 #endif
   195 
   196 /* and so is the partitioning logic: */
   197 #define Partition(swapper,sz) {			\
   198   int swapped=0;				\
   199   do {						\
   200     while (compare(first,pivot)<0) first+=sz;	\
   201     while (compare(pivot,last)<0) last-=sz;	\
   202     if (first<last) {				\
   203       swapper(first,last); swapped=1;		\
   204       first+=sz; last-=sz; }			\
   205     else if (first==last) { first+=sz; last-=sz; break; }\
   206   } while (first<=last);			\
   207   if (!swapped) pop				\
   208 }
   209 
   210 /* and so is the pre-insertion-sort operation of putting
   211  * the smallest element into place as a sentinel.
   212  * Doing this makes the inner loop nicer. I got this
   213  * idea from the GNU implementation of qsort().
   214  */
   215 #define PreInsertion(swapper,limit,sz)		\
   216   first=base;					\
   217   last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
   218   while (last!=base) {				\
   219     if (compare(first,last)>0) first=last;	\
   220     last-=sz; }					\
   221   if (first!=base) swapper(first,(char*)base);
   222 
   223 /* and so is the insertion sort, in the first two cases: */
   224 #define Insertion(swapper)			\
   225   last=((char*)base)+nmemb*size;		\
   226   for (first=((char*)base)+size;first!=last;first+=size) {	\
   227     char *test;					\
   228     /* Find the right place for |first|.	\
   229      * My apologies for var reuse. */		\
   230     for (test=first-size;compare(test,first)>0;test-=size) ;	\
   231     test+=size;					\
   232     if (test!=first) {				\
   233       /* Shift everything in [test,first)	\
   234        * up by one, and place |first|		\
   235        * where |test| is. */			\
   236       memcpy(pivot,first,size);			\
   237       memmove(test+size,test,first-test);	\
   238       memcpy(test,pivot,size);			\
   239     }						\
   240   }
   241 
   242 #define SWAP_nonaligned(a,b) { \
   243   register char *aa=(a),*bb=(b); \
   244   register size_t sz=size; \
   245   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
   246 
   247 #define SWAP_aligned(a,b) { \
   248   register int *aa=(int*)(a),*bb=(int*)(b); \
   249   register size_t sz=size; \
   250   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
   251 
   252 #define SWAP_words(a,b) { \
   253   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
   254 
   255 /* ---------------------------------------------------------------------- */
   256 
   257 static char * pivot_big(char *first, char *mid, char *last, size_t size,
   258                         int compare(const void *, const void *)) {
   259   int d=(((last-first)/size)>>3)*size;
   260   char *m1,*m2,*m3;
   261   { char *a=first, *b=first+d, *c=first+2*d;
   262 #ifdef DEBUG_QSORT
   263 fprintf(stderr,"< %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
   264 #endif
   265     m1 = compare(a,b)<0 ?
   266            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   267          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   268   }
   269   { char *a=mid-d, *b=mid, *c=mid+d;
   270 #ifdef DEBUG_QSORT
   271 fprintf(stderr,". %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
   272 #endif
   273     m2 = compare(a,b)<0 ?
   274            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   275          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   276   }
   277   { char *a=last-2*d, *b=last-d, *c=last;
   278 #ifdef DEBUG_QSORT
   279 fprintf(stderr,"> %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
   280 #endif
   281     m3 = compare(a,b)<0 ?
   282            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   283          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   284   }
   285 #ifdef DEBUG_QSORT
   286 fprintf(stderr,"-> %d %d %d\n",*(int*)m1,*(int*)m2,*(int*)m3);
   287 #endif
   288   return compare(m1,m2)<0 ?
   289            (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
   290          : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
   291 }
   292 
   293 /* ---------------------------------------------------------------------- */
   294 
   295 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
   296            int (*compare)(const void *, const void *)) {
   297 
   298   stack_entry stack[STACK_SIZE];
   299   int stacktop=0;
   300   char *first,*last;
   301   char *pivot=malloc(size);
   302   size_t trunc=TRUNC_nonaligned*size;
   303   assert(pivot!=0);
   304 
   305   first=(char*)base; last=first+(nmemb-1)*size;
   306 
   307   if ((size_t)(last-first)>trunc) {
   308     char *ffirst=first, *llast=last;
   309     while (1) {
   310       /* Select pivot */
   311       { char * mid=first+size*((last-first)/size >> 1);
   312         Pivot(SWAP_nonaligned,size);
   313         memcpy(pivot,mid,size);
   314       }
   315       /* Partition. */
   316       Partition(SWAP_nonaligned,size);
   317       /* Prepare to recurse/iterate. */
   318       Recurse(trunc)
   319     }
   320   }
   321   PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
   322   Insertion(SWAP_nonaligned);
   323   free(pivot);
   324 }
   325 
   326 static void qsort_aligned(void *base, size_t nmemb, size_t size,
   327            int (*compare)(const void *, const void *)) {
   328 
   329   stack_entry stack[STACK_SIZE];
   330   int stacktop=0;
   331   char *first,*last;
   332   char *pivot=malloc(size);
   333   size_t trunc=TRUNC_aligned*size;
   334   assert(pivot!=0);
   335 
   336   first=(char*)base; last=first+(nmemb-1)*size;
   337 
   338   if ((size_t)(last-first)>trunc) {
   339     char *ffirst=first,*llast=last;
   340     while (1) {
   341       /* Select pivot */
   342       { char * mid=first+size*((last-first)/size >> 1);
   343         Pivot(SWAP_aligned,size);
   344         memcpy(pivot,mid,size);
   345       }
   346       /* Partition. */
   347       Partition(SWAP_aligned,size);
   348       /* Prepare to recurse/iterate. */
   349       Recurse(trunc)
   350     }
   351   }
   352   PreInsertion(SWAP_aligned,TRUNC_aligned,size);
   353   Insertion(SWAP_aligned);
   354   free(pivot);
   355 }
   356 
   357 static void qsort_words(void *base, size_t nmemb,
   358            int (*compare)(const void *, const void *)) {
   359 
   360   stack_entry stack[STACK_SIZE];
   361   int stacktop=0;
   362   char *first,*last;
   363   char *pivot=malloc(WORD_BYTES);
   364   assert(pivot!=0);
   365 
   366   first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
   367 
   368   if (last-first>TRUNC_words) {
   369     char *ffirst=first, *llast=last;
   370     while (1) {
   371 #ifdef DEBUG_QSORT
   372 fprintf(stderr,"Doing %d:%d: ",
   373         (first-(char*)base)/WORD_BYTES,
   374         (last-(char*)base)/WORD_BYTES);
   375 #endif
   376       /* Select pivot */
   377       { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
   378         Pivot(SWAP_words,WORD_BYTES);
   379         *(int*)pivot=*(int*)mid;
   380       }
   381 #ifdef DEBUG_QSORT
   382 fprintf(stderr,"pivot=%d\n",*(int*)pivot);
   383 #endif
   384       /* Partition. */
   385       Partition(SWAP_words,WORD_BYTES);
   386       /* Prepare to recurse/iterate. */
   387       Recurse(TRUNC_words)
   388     }
   389   }
   390   PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES);
   391   /* Now do insertion sort. */
   392   last=((char*)base)+nmemb*WORD_BYTES;
   393   for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
   394     /* Find the right place for |first|. My apologies for var reuse */
   395     int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
   396     *(int*)pivot=*(int*)first;
   397     for (;compare(pl,pivot)>0;pr=pl,--pl) {
   398       *pr=*pl; }
   399     if (pr!=(int*)first) *pr=*(int*)pivot;
   400   }
   401   free(pivot);
   402 }
   403 
   404 /* ---------------------------------------------------------------------- */
   405 
   406 void SDL_qsort(void *base, size_t nmemb, size_t size,
   407            int (*compare)(const void *, const void *)) {
   408 
   409   if (nmemb<=1) return;
   410   if (((int)base|size)&(WORD_BYTES-1))
   411     qsort_nonaligned(base,nmemb,size,compare);
   412   else if (size!=WORD_BYTES)
   413     qsort_aligned(base,nmemb,size,compare);
   414   else
   415     qsort_words(base,nmemb,compare);
   416 }
   417 
   418 #endif /* !HAVE_QSORT */