src/stdlib/SDL_qsort.c
changeset 1330 450721ad5436
child 1331 1cbaeee565b1
equal deleted inserted replaced
1329:bc67bbf87818 1330:450721ad5436
       
     1 /* qsort.c
       
     2  * (c) 1998 Gareth McCaughan
       
     3  *
       
     4  * This is a drop-in replacement for the C library's |qsort()| routine.
       
     5  *
       
     6  * Features:
       
     7  *   - Median-of-three pivoting (and more)
       
     8  *   - Truncation and final polishing by a single insertion sort
       
     9  *   - Early truncation when no swaps needed in pivoting step
       
    10  *   - Explicit recursion, guaranteed not to overflow
       
    11  *   - A few little wrinkles stolen from the GNU |qsort()|.
       
    12  *   - separate code for non-aligned / aligned / word-size objects
       
    13  *
       
    14  * This code may be reproduced freely provided
       
    15  *   - this file is retained unaltered apart from minor
       
    16  *     changes for portability and efficiency
       
    17  *   - no changes are made to this comment
       
    18  *   - any changes that *are* made are clearly flagged
       
    19  *   - the _ID string below is altered by inserting, after
       
    20  *     the date, the string " altered" followed at your option
       
    21  *     by other material. (Exceptions: you may change the name
       
    22  *     of the exported routine without changing the ID string.
       
    23  *     You may change the values of the macros TRUNC_* and
       
    24  *     PIVOT_THRESHOLD without changing the ID string, provided
       
    25  *     they remain constants with TRUNC_nonaligned, TRUNC_aligned
       
    26  *     and TRUNC_words/WORD_BYTES between 8 and 24, and
       
    27  *     PIVOT_THRESHOLD between 32 and 200.)
       
    28  *
       
    29  * You may use it in anything you like; you may make money
       
    30  * out of it; you may distribute it in object form or as
       
    31  * part of an executable without including source code;
       
    32  * you don't have to credit me. (But it would be nice if
       
    33  * you did.)
       
    34  *
       
    35  * If you find problems with this code, or find ways of
       
    36  * making it significantly faster, please let me know!
       
    37  * My e-mail address, valid as of early 1998 and certainly
       
    38  * OK for at least the next 18 months, is
       
    39  *    gjm11@dpmms.cam.ac.uk
       
    40  * Thanks!
       
    41  *
       
    42  * Gareth McCaughan   Peterhouse   Cambridge   1998
       
    43  */
       
    44 
       
    45 /*
       
    46 #include <assert.h>
       
    47 #include <stdlib.h>
       
    48 #include <string.h>
       
    49 */
       
    50 #define assert(X)
       
    51 #include "SDL_stdlib.h"
       
    52 #include "SDL_string.h"
       
    53 
       
    54 #ifndef HAVE_QSORT
       
    55 
       
    56 static char _ID[]="<qsort.c gjm 1.12 1998-03-19>";
       
    57 
       
    58 /* How many bytes are there per word? (Must be a power of 2,
       
    59  * and must in fact equal sizeof(int).)
       
    60  */
       
    61 #define WORD_BYTES sizeof(int)
       
    62 
       
    63 /* How big does our stack need to be? Answer: one entry per
       
    64  * bit in a |size_t|.
       
    65  */
       
    66 #define STACK_SIZE (8*sizeof(size_t))
       
    67 
       
    68 /* Different situations have slightly different requirements,
       
    69  * and we make life epsilon easier by using different truncation
       
    70  * points for the three different cases.
       
    71  * So far, I have tuned TRUNC_words and guessed that the same
       
    72  * value might work well for the other two cases. Of course
       
    73  * what works well on my machine might work badly on yours.
       
    74  */
       
    75 #define TRUNC_nonaligned	12
       
    76 #define TRUNC_aligned		12
       
    77 #define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
       
    78 
       
    79 /* We use a simple pivoting algorithm for shortish sub-arrays
       
    80  * and a more complicated one for larger ones. The threshold
       
    81  * is PIVOT_THRESHOLD.
       
    82  */
       
    83 #define PIVOT_THRESHOLD 40
       
    84 
       
    85 typedef struct { char * first; char * last; } stack_entry;
       
    86 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
       
    87 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
       
    88 #define doLeft {first=ffirst;llast=last;continue;}
       
    89 #define doRight {ffirst=first;last=llast;continue;}
       
    90 #define pop {if (--stacktop<0) break;\
       
    91   first=ffirst=stack[stacktop].first;\
       
    92   last=llast=stack[stacktop].last;\
       
    93   continue;}
       
    94 
       
    95 /* Some comments on the implementation.
       
    96  * 1. When we finish partitioning the array into "low"
       
    97  *    and "high", we forget entirely about short subarrays,
       
    98  *    because they'll be done later by insertion sort.
       
    99  *    Doing lots of little insertion sorts might be a win
       
   100  *    on large datasets for locality-of-reference reasons,
       
   101  *    but it makes the code much nastier and increases
       
   102  *    bookkeeping overhead.
       
   103  * 2. We always save the shorter and get to work on the
       
   104  *    longer. This guarantees that every time we push
       
   105  *    an item onto the stack its size is <= 1/2 of that
       
   106  *    of its parent; so the stack can't need more than
       
   107  *    log_2(max-array-size) entries.
       
   108  * 3. We choose a pivot by looking at the first, last
       
   109  *    and middle elements. We arrange them into order
       
   110  *    because it's easy to do that in conjunction with
       
   111  *    choosing the pivot, and it makes things a little
       
   112  *    easier in the partitioning step. Anyway, the pivot
       
   113  *    is the middle of these three. It's still possible
       
   114  *    to construct datasets where the algorithm takes
       
   115  *    time of order n^2, but it simply never happens in
       
   116  *    practice.
       
   117  * 3' Newsflash: On further investigation I find that
       
   118  *    it's easy to construct datasets where median-of-3
       
   119  *    simply isn't good enough. So on large-ish subarrays
       
   120  *    we do a more sophisticated pivoting: we take three
       
   121  *    sets of 3 elements, find their medians, and then
       
   122  *    take the median of those.
       
   123  * 4. We copy the pivot element to a separate place
       
   124  *    because that way we can always do our comparisons
       
   125  *    directly against a pointer to that separate place,
       
   126  *    and don't have to wonder "did we move the pivot
       
   127  *    element?". This makes the inner loop better.
       
   128  * 5. It's possible to make the pivoting even more
       
   129  *    reliable by looking at more candidates when n
       
   130  *    is larger. (Taking this to its logical conclusion
       
   131  *    results in a variant of quicksort that doesn't
       
   132  *    have that n^2 worst case.) However, the overhead
       
   133  *    from the extra bookkeeping means that it's just
       
   134  *    not worth while.
       
   135  * 6. This is pretty clean and portable code. Here are
       
   136  *    all the potential portability pitfalls and problems
       
   137  *    I know of:
       
   138  *      - In one place (the insertion sort) I construct
       
   139  *        a pointer that points just past the end of the
       
   140  *        supplied array, and assume that (a) it won't
       
   141  *        compare equal to any pointer within the array,
       
   142  *        and (b) it will compare equal to a pointer
       
   143  *        obtained by stepping off the end of the array.
       
   144  *        These might fail on some segmented architectures.
       
   145  *      - I assume that there are 8 bits in a |char| when
       
   146  *        computing the size of stack needed. This would
       
   147  *        fail on machines with 9-bit or 16-bit bytes.
       
   148  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
       
   149  *        and |(size&(sizeof(int)-1))==0| then it's safe to
       
   150  *        get at array elements via |int*|s, and that if
       
   151  *        actually |size==sizeof(int)| as well then it's
       
   152  *        safe to treat the elements as |int|s. This might
       
   153  *        fail on systems that convert pointers to integers
       
   154  *        in non-standard ways.
       
   155  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
       
   156  *        would be false on a machine with 8-bit |char|s,
       
   157  *        16-bit |int|s and 4096-bit |size_t|s. :-)
       
   158  */
       
   159 
       
   160 /* The recursion logic is the same in each case: */
       
   161 #define Recurse(Trunc)				\
       
   162       { size_t l=last-ffirst,r=llast-first;	\
       
   163         if (l<Trunc) {				\
       
   164           if (r>=Trunc) doRight			\
       
   165           else pop				\
       
   166         }					\
       
   167         else if (l<=r) { pushLeft; doRight }	\
       
   168         else if (r>=Trunc) { pushRight; doLeft }\
       
   169         else doLeft				\
       
   170       }
       
   171 
       
   172 /* and so is the pivoting logic: */
       
   173 #define Pivot(swapper,sz)			\
       
   174   if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
       
   175   else {	\
       
   176     if (compare(first,mid)<0) {			\
       
   177       if (compare(mid,last)>0) {		\
       
   178         swapper(mid,last);			\
       
   179         if (compare(first,mid)>0) swapper(first,mid);\
       
   180       }						\
       
   181     }						\
       
   182     else {					\
       
   183       if (compare(mid,last)>0) swapper(first,last)\
       
   184       else {					\
       
   185         swapper(first,mid);			\
       
   186         if (compare(mid,last)>0) swapper(mid,last);\
       
   187       }						\
       
   188     }						\
       
   189     first+=sz; last-=sz;			\
       
   190   }
       
   191 
       
   192 #ifdef DEBUG_QSORT
       
   193 #include <stdio.h>
       
   194 #endif
       
   195 
       
   196 /* and so is the partitioning logic: */
       
   197 #define Partition(swapper,sz) {			\
       
   198   int swapped=0;				\
       
   199   do {						\
       
   200     while (compare(first,pivot)<0) first+=sz;	\
       
   201     while (compare(pivot,last)<0) last-=sz;	\
       
   202     if (first<last) {				\
       
   203       swapper(first,last); swapped=1;		\
       
   204       first+=sz; last-=sz; }			\
       
   205     else if (first==last) { first+=sz; last-=sz; break; }\
       
   206   } while (first<=last);			\
       
   207   if (!swapped) pop				\
       
   208 }
       
   209 
       
   210 /* and so is the pre-insertion-sort operation of putting
       
   211  * the smallest element into place as a sentinel.
       
   212  * Doing this makes the inner loop nicer. I got this
       
   213  * idea from the GNU implementation of qsort().
       
   214  */
       
   215 #define PreInsertion(swapper,limit,sz)		\
       
   216   first=base;					\
       
   217   last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
       
   218   while (last!=base) {				\
       
   219     if (compare(first,last)>0) first=last;	\
       
   220     last-=sz; }					\
       
   221   if (first!=base) swapper(first,(char*)base);
       
   222 
       
   223 /* and so is the insertion sort, in the first two cases: */
       
   224 #define Insertion(swapper)			\
       
   225   last=((char*)base)+nmemb*size;		\
       
   226   for (first=((char*)base)+size;first!=last;first+=size) {	\
       
   227     char *test;					\
       
   228     /* Find the right place for |first|.	\
       
   229      * My apologies for var reuse. */		\
       
   230     for (test=first-size;compare(test,first)>0;test-=size) ;	\
       
   231     test+=size;					\
       
   232     if (test!=first) {				\
       
   233       /* Shift everything in [test,first)	\
       
   234        * up by one, and place |first|		\
       
   235        * where |test| is. */			\
       
   236       memcpy(pivot,first,size);			\
       
   237       memmove(test+size,test,first-test);	\
       
   238       memcpy(test,pivot,size);			\
       
   239     }						\
       
   240   }
       
   241 
       
   242 #define SWAP_nonaligned(a,b) { \
       
   243   register char *aa=(a),*bb=(b); \
       
   244   register size_t sz=size; \
       
   245   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
       
   246 
       
   247 #define SWAP_aligned(a,b) { \
       
   248   register int *aa=(int*)(a),*bb=(int*)(b); \
       
   249   register size_t sz=size; \
       
   250   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
       
   251 
       
   252 #define SWAP_words(a,b) { \
       
   253   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
       
   254 
       
   255 /* ---------------------------------------------------------------------- */
       
   256 
       
   257 static char * pivot_big(char *first, char *mid, char *last, size_t size,
       
   258                         int compare(const void *, const void *)) {
       
   259   int d=(((last-first)/size)>>3)*size;
       
   260   char *m1,*m2,*m3;
       
   261   { char *a=first, *b=first+d, *c=first+2*d;
       
   262 #ifdef DEBUG_QSORT
       
   263 fprintf(stderr,"< %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
       
   264 #endif
       
   265     m1 = compare(a,b)<0 ?
       
   266            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
       
   267          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
       
   268   }
       
   269   { char *a=mid-d, *b=mid, *c=mid+d;
       
   270 #ifdef DEBUG_QSORT
       
   271 fprintf(stderr,". %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
       
   272 #endif
       
   273     m2 = compare(a,b)<0 ?
       
   274            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
       
   275          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
       
   276   }
       
   277   { char *a=last-2*d, *b=last-d, *c=last;
       
   278 #ifdef DEBUG_QSORT
       
   279 fprintf(stderr,"> %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
       
   280 #endif
       
   281     m3 = compare(a,b)<0 ?
       
   282            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
       
   283          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
       
   284   }
       
   285 #ifdef DEBUG_QSORT
       
   286 fprintf(stderr,"-> %d %d %d\n",*(int*)m1,*(int*)m2,*(int*)m3);
       
   287 #endif
       
   288   return compare(m1,m2)<0 ?
       
   289            (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
       
   290          : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
       
   291 }
       
   292 
       
   293 /* ---------------------------------------------------------------------- */
       
   294 
       
   295 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
       
   296            int (*compare)(const void *, const void *)) {
       
   297 
       
   298   stack_entry stack[STACK_SIZE];
       
   299   int stacktop=0;
       
   300   char *first,*last;
       
   301   char *pivot=malloc(size);
       
   302   size_t trunc=TRUNC_nonaligned*size;
       
   303   assert(pivot!=0);
       
   304 
       
   305   first=(char*)base; last=first+(nmemb-1)*size;
       
   306 
       
   307   if ((size_t)(last-first)>trunc) {
       
   308     char *ffirst=first, *llast=last;
       
   309     while (1) {
       
   310       /* Select pivot */
       
   311       { char * mid=first+size*((last-first)/size >> 1);
       
   312         Pivot(SWAP_nonaligned,size);
       
   313         memcpy(pivot,mid,size);
       
   314       }
       
   315       /* Partition. */
       
   316       Partition(SWAP_nonaligned,size);
       
   317       /* Prepare to recurse/iterate. */
       
   318       Recurse(trunc)
       
   319     }
       
   320   }
       
   321   PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
       
   322   Insertion(SWAP_nonaligned);
       
   323   free(pivot);
       
   324 }
       
   325 
       
   326 static void qsort_aligned(void *base, size_t nmemb, size_t size,
       
   327            int (*compare)(const void *, const void *)) {
       
   328 
       
   329   stack_entry stack[STACK_SIZE];
       
   330   int stacktop=0;
       
   331   char *first,*last;
       
   332   char *pivot=malloc(size);
       
   333   size_t trunc=TRUNC_aligned*size;
       
   334   assert(pivot!=0);
       
   335 
       
   336   first=(char*)base; last=first+(nmemb-1)*size;
       
   337 
       
   338   if ((size_t)(last-first)>trunc) {
       
   339     char *ffirst=first,*llast=last;
       
   340     while (1) {
       
   341       /* Select pivot */
       
   342       { char * mid=first+size*((last-first)/size >> 1);
       
   343         Pivot(SWAP_aligned,size);
       
   344         memcpy(pivot,mid,size);
       
   345       }
       
   346       /* Partition. */
       
   347       Partition(SWAP_aligned,size);
       
   348       /* Prepare to recurse/iterate. */
       
   349       Recurse(trunc)
       
   350     }
       
   351   }
       
   352   PreInsertion(SWAP_aligned,TRUNC_aligned,size);
       
   353   Insertion(SWAP_aligned);
       
   354   free(pivot);
       
   355 }
       
   356 
       
   357 static void qsort_words(void *base, size_t nmemb,
       
   358            int (*compare)(const void *, const void *)) {
       
   359 
       
   360   stack_entry stack[STACK_SIZE];
       
   361   int stacktop=0;
       
   362   char *first,*last;
       
   363   char *pivot=malloc(WORD_BYTES);
       
   364   assert(pivot!=0);
       
   365 
       
   366   first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
       
   367 
       
   368   if (last-first>TRUNC_words) {
       
   369     char *ffirst=first, *llast=last;
       
   370     while (1) {
       
   371 #ifdef DEBUG_QSORT
       
   372 fprintf(stderr,"Doing %d:%d: ",
       
   373         (first-(char*)base)/WORD_BYTES,
       
   374         (last-(char*)base)/WORD_BYTES);
       
   375 #endif
       
   376       /* Select pivot */
       
   377       { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
       
   378         Pivot(SWAP_words,WORD_BYTES);
       
   379         *(int*)pivot=*(int*)mid;
       
   380       }
       
   381 #ifdef DEBUG_QSORT
       
   382 fprintf(stderr,"pivot=%d\n",*(int*)pivot);
       
   383 #endif
       
   384       /* Partition. */
       
   385       Partition(SWAP_words,WORD_BYTES);
       
   386       /* Prepare to recurse/iterate. */
       
   387       Recurse(TRUNC_words)
       
   388     }
       
   389   }
       
   390   PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES);
       
   391   /* Now do insertion sort. */
       
   392   last=((char*)base)+nmemb*WORD_BYTES;
       
   393   for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
       
   394     /* Find the right place for |first|. My apologies for var reuse */
       
   395     int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
       
   396     *(int*)pivot=*(int*)first;
       
   397     for (;compare(pl,pivot)>0;pr=pl,--pl) {
       
   398       *pr=*pl; }
       
   399     if (pr!=(int*)first) *pr=*(int*)pivot;
       
   400   }
       
   401   free(pivot);
       
   402 }
       
   403 
       
   404 /* ---------------------------------------------------------------------- */
       
   405 
       
   406 void SDL_qsort(void *base, size_t nmemb, size_t size,
       
   407            int (*compare)(const void *, const void *)) {
       
   408 
       
   409   if (nmemb<=1) return;
       
   410   if (((int)base|size)&(WORD_BYTES-1))
       
   411     qsort_nonaligned(base,nmemb,size,compare);
       
   412   else if (size!=WORD_BYTES)
       
   413     qsort_aligned(base,nmemb,size,compare);
       
   414   else
       
   415     qsort_words(base,nmemb,compare);
       
   416 }
       
   417 
       
   418 #endif /* !HAVE_QSORT */