src/stdlib/SDL_qsort.c
author Ryan C. Gordon
Fri, 15 Mar 2013 01:01:20 -0400
changeset 7003 eeaf77005c30
parent 6281 e46d6f4b469e
child 7351 668a3dc28361
permissions -rw-r--r--
Improvements to stdlib.

All SDL_* functions are always available as real symbols, so you can always
link against them as a stable ABI. By default, however, all the things that
might have dithered down to macros in your application are now force-inlined,
to give you the same effect as before and theoretically better performance,
but still solve the classic macro problems.

Elsewhere, we provide real functions for these things that simply wrap the
inline functions, in case one needs to have a real function available.

Also: this exposed bugs: SDL_abs() does something different if you had the
macro vs the libc function, SDL_memcpy() returns a void* in the function
but not the macro, etc.
     1 /* qsort.c
     2  * (c) 1998 Gareth McCaughan
     3  *
     4  * This is a drop-in replacement for the C library's |qsort()| routine.
     5  *
     6  * Features:
     7  *   - Median-of-three pivoting (and more)
     8  *   - Truncation and final polishing by a single insertion sort
     9  *   - Early truncation when no swaps needed in pivoting step
    10  *   - Explicit recursion, guaranteed not to overflow
    11  *   - A few little wrinkles stolen from the GNU |qsort()|.
    12  *   - separate code for non-aligned / aligned / word-size objects
    13  *
    14  * This code may be reproduced freely provided
    15  *   - this file is retained unaltered apart from minor
    16  *     changes for portability and efficiency
    17  *   - no changes are made to this comment
    18  *   - any changes that *are* made are clearly flagged
    19  *   - the _ID string below is altered by inserting, after
    20  *     the date, the string " altered" followed at your option
    21  *     by other material. (Exceptions: you may change the name
    22  *     of the exported routine without changing the ID string.
    23  *     You may change the values of the macros TRUNC_* and
    24  *     PIVOT_THRESHOLD without changing the ID string, provided
    25  *     they remain constants with TRUNC_nonaligned, TRUNC_aligned
    26  *     and TRUNC_words/WORD_BYTES between 8 and 24, and
    27  *     PIVOT_THRESHOLD between 32 and 200.)
    28  *
    29  * You may use it in anything you like; you may make money
    30  * out of it; you may distribute it in object form or as
    31  * part of an executable without including source code;
    32  * you don't have to credit me. (But it would be nice if
    33  * you did.)
    34  *
    35  * If you find problems with this code, or find ways of
    36  * making it significantly faster, please let me know!
    37  * My e-mail address, valid as of early 1998 and certainly
    38  * OK for at least the next 18 months, is
    39  *    gjm11@dpmms.cam.ac.uk
    40  * Thanks!
    41  *
    42  * Gareth McCaughan   Peterhouse   Cambridge   1998
    43  */
    44 #include "SDL_config.h"
    45 
    46 /*
    47 #include <assert.h>
    48 #include <stdlib.h>
    49 #include <string.h>
    50 */
    51 #include "SDL_stdinc.h"
    52 #include "SDL_assert.h"
    53 
    54 #ifdef SDL_qsort
    55 #undef SDL_qsort
    56 void
    57 SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
    58 {
    59     SDL_qsort_inline(base, nmemb, size, compare);
    60 }
    61 #else
    62 
    63 #ifdef assert
    64 #undef assert
    65 #endif
    66 #define assert(X) SDL_assert(X)
    67 #ifdef malloc
    68 #undef malloc
    69 #endif
    70 #define malloc	SDL_malloc
    71 #ifdef free
    72 #undef free
    73 #endif
    74 #define free	SDL_free
    75 #ifdef memcpy
    76 #undef memcpy
    77 #endif
    78 #define memcpy	SDL_memcpy
    79 #ifdef memmove
    80 #undef memmove
    81 #endif
    82 #define memmove	SDL_memmove
    83 #ifdef qsort
    84 #undef qsort
    85 #endif
    86 #define qsort	SDL_qsort
    87 
    88 static const char _ID[] = "<qsort.c gjm 1.12 1998-03-19>";
    89 
    90 /* How many bytes are there per word? (Must be a power of 2,
    91  * and must in fact equal sizeof(int).)
    92  */
    93 #define WORD_BYTES sizeof(int)
    94 
    95 /* How big does our stack need to be? Answer: one entry per
    96  * bit in a |size_t|.
    97  */
    98 #define STACK_SIZE (8*sizeof(size_t))
    99 
   100 /* Different situations have slightly different requirements,
   101  * and we make life epsilon easier by using different truncation
   102  * points for the three different cases.
   103  * So far, I have tuned TRUNC_words and guessed that the same
   104  * value might work well for the other two cases. Of course
   105  * what works well on my machine might work badly on yours.
   106  */
   107 #define TRUNC_nonaligned	12
   108 #define TRUNC_aligned		12
   109 #define TRUNC_words		12*WORD_BYTES   /* nb different meaning */
   110 
   111 /* We use a simple pivoting algorithm for shortish sub-arrays
   112  * and a more complicated one for larger ones. The threshold
   113  * is PIVOT_THRESHOLD.
   114  */
   115 #define PIVOT_THRESHOLD 40
   116 
   117 typedef struct
   118 {
   119     char *first;
   120     char *last;
   121 } stack_entry;
   122 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
   123 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
   124 #define doLeft {first=ffirst;llast=last;continue;}
   125 #define doRight {ffirst=first;last=llast;continue;}
   126 #define pop {if (--stacktop<0) break;\
   127   first=ffirst=stack[stacktop].first;\
   128   last=llast=stack[stacktop].last;\
   129   continue;}
   130 
   131 /* Some comments on the implementation.
   132  * 1. When we finish partitioning the array into "low"
   133  *    and "high", we forget entirely about short subarrays,
   134  *    because they'll be done later by insertion sort.
   135  *    Doing lots of little insertion sorts might be a win
   136  *    on large datasets for locality-of-reference reasons,
   137  *    but it makes the code much nastier and increases
   138  *    bookkeeping overhead.
   139  * 2. We always save the shorter and get to work on the
   140  *    longer. This guarantees that every time we push
   141  *    an item onto the stack its size is <= 1/2 of that
   142  *    of its parent; so the stack can't need more than
   143  *    log_2(max-array-size) entries.
   144  * 3. We choose a pivot by looking at the first, last
   145  *    and middle elements. We arrange them into order
   146  *    because it's easy to do that in conjunction with
   147  *    choosing the pivot, and it makes things a little
   148  *    easier in the partitioning step. Anyway, the pivot
   149  *    is the middle of these three. It's still possible
   150  *    to construct datasets where the algorithm takes
   151  *    time of order n^2, but it simply never happens in
   152  *    practice.
   153  * 3' Newsflash: On further investigation I find that
   154  *    it's easy to construct datasets where median-of-3
   155  *    simply isn't good enough. So on large-ish subarrays
   156  *    we do a more sophisticated pivoting: we take three
   157  *    sets of 3 elements, find their medians, and then
   158  *    take the median of those.
   159  * 4. We copy the pivot element to a separate place
   160  *    because that way we can always do our comparisons
   161  *    directly against a pointer to that separate place,
   162  *    and don't have to wonder "did we move the pivot
   163  *    element?". This makes the inner loop better.
   164  * 5. It's possible to make the pivoting even more
   165  *    reliable by looking at more candidates when n
   166  *    is larger. (Taking this to its logical conclusion
   167  *    results in a variant of quicksort that doesn't
   168  *    have that n^2 worst case.) However, the overhead
   169  *    from the extra bookkeeping means that it's just
   170  *    not worth while.
   171  * 6. This is pretty clean and portable code. Here are
   172  *    all the potential portability pitfalls and problems
   173  *    I know of:
   174  *      - In one place (the insertion sort) I construct
   175  *        a pointer that points just past the end of the
   176  *        supplied array, and assume that (a) it won't
   177  *        compare equal to any pointer within the array,
   178  *        and (b) it will compare equal to a pointer
   179  *        obtained by stepping off the end of the array.
   180  *        These might fail on some segmented architectures.
   181  *      - I assume that there are 8 bits in a |char| when
   182  *        computing the size of stack needed. This would
   183  *        fail on machines with 9-bit or 16-bit bytes.
   184  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
   185  *        and |(size&(sizeof(int)-1))==0| then it's safe to
   186  *        get at array elements via |int*|s, and that if
   187  *        actually |size==sizeof(int)| as well then it's
   188  *        safe to treat the elements as |int|s. This might
   189  *        fail on systems that convert pointers to integers
   190  *        in non-standard ways.
   191  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
   192  *        would be false on a machine with 8-bit |char|s,
   193  *        16-bit |int|s and 4096-bit |size_t|s. :-)
   194  */
   195 
   196 /* The recursion logic is the same in each case: */
   197 #define Recurse(Trunc)				\
   198       { size_t l=last-ffirst,r=llast-first;	\
   199         if (l<Trunc) {				\
   200           if (r>=Trunc) doRight			\
   201           else pop				\
   202         }					\
   203         else if (l<=r) { pushLeft; doRight }	\
   204         else if (r>=Trunc) { pushRight; doLeft }\
   205         else doLeft				\
   206       }
   207 
   208 /* and so is the pivoting logic: */
   209 #define Pivot(swapper,sz)			\
   210   if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
   211   else {	\
   212     if (compare(first,mid)<0) {			\
   213       if (compare(mid,last)>0) {		\
   214         swapper(mid,last);			\
   215         if (compare(first,mid)>0) swapper(first,mid);\
   216       }						\
   217     }						\
   218     else {					\
   219       if (compare(mid,last)>0) swapper(first,last)\
   220       else {					\
   221         swapper(first,mid);			\
   222         if (compare(mid,last)>0) swapper(mid,last);\
   223       }						\
   224     }						\
   225     first+=sz; last-=sz;			\
   226   }
   227 
   228 #ifdef DEBUG_QSORT
   229 #include <stdio.h>
   230 #endif
   231 
   232 /* and so is the partitioning logic: */
   233 #define Partition(swapper,sz) {			\
   234   int swapped=0;				\
   235   do {						\
   236     while (compare(first,pivot)<0) first+=sz;	\
   237     while (compare(pivot,last)<0) last-=sz;	\
   238     if (first<last) {				\
   239       swapper(first,last); swapped=1;		\
   240       first+=sz; last-=sz; }			\
   241     else if (first==last) { first+=sz; last-=sz; break; }\
   242   } while (first<=last);			\
   243   if (!swapped) pop				\
   244 }
   245 
   246 /* and so is the pre-insertion-sort operation of putting
   247  * the smallest element into place as a sentinel.
   248  * Doing this makes the inner loop nicer. I got this
   249  * idea from the GNU implementation of qsort().
   250  */
   251 #define PreInsertion(swapper,limit,sz)		\
   252   first=base;					\
   253   last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
   254   while (last!=base) {				\
   255     if (compare(first,last)>0) first=last;	\
   256     last-=sz; }					\
   257   if (first!=base) swapper(first,(char*)base);
   258 
   259 /* and so is the insertion sort, in the first two cases: */
   260 #define Insertion(swapper)			\
   261   last=((char*)base)+nmemb*size;		\
   262   for (first=((char*)base)+size;first!=last;first+=size) {	\
   263     char *test;					\
   264     /* Find the right place for |first|.	\
   265      * My apologies for var reuse. */		\
   266     for (test=first-size;compare(test,first)>0;test-=size) ;	\
   267     test+=size;					\
   268     if (test!=first) {				\
   269       /* Shift everything in [test,first)	\
   270        * up by one, and place |first|		\
   271        * where |test| is. */			\
   272       memcpy(pivot,first,size);			\
   273       memmove(test+size,test,first-test);	\
   274       memcpy(test,pivot,size);			\
   275     }						\
   276   }
   277 
   278 #define SWAP_nonaligned(a,b) { \
   279   register char *aa=(a),*bb=(b); \
   280   register size_t sz=size; \
   281   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
   282 
   283 #define SWAP_aligned(a,b) { \
   284   register int *aa=(int*)(a),*bb=(int*)(b); \
   285   register size_t sz=size; \
   286   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
   287 
   288 #define SWAP_words(a,b) { \
   289   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
   290 
   291 /* ---------------------------------------------------------------------- */
   292 
   293 static char *
   294 pivot_big(char *first, char *mid, char *last, size_t size,
   295           int compare(const void *, const void *))
   296 {
   297     size_t d = (((last - first) / size) >> 3) * size;
   298     char *m1, *m2, *m3;
   299     {
   300         char *a = first, *b = first + d, *c = first + 2 * d;
   301 #ifdef DEBUG_QSORT
   302         fprintf(stderr, "< %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
   303 #endif
   304         m1 = compare(a, b) < 0 ?
   305             (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
   306             : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
   307     }
   308     {
   309         char *a = mid - d, *b = mid, *c = mid + d;
   310 #ifdef DEBUG_QSORT
   311         fprintf(stderr, ". %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
   312 #endif
   313         m2 = compare(a, b) < 0 ?
   314             (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
   315             : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
   316     }
   317     {
   318         char *a = last - 2 * d, *b = last - d, *c = last;
   319 #ifdef DEBUG_QSORT
   320         fprintf(stderr, "> %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
   321 #endif
   322         m3 = compare(a, b) < 0 ?
   323             (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
   324             : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
   325     }
   326 #ifdef DEBUG_QSORT
   327     fprintf(stderr, "-> %d %d %d\n", *(int *) m1, *(int *) m2, *(int *) m3);
   328 #endif
   329     return compare(m1, m2) < 0 ?
   330         (compare(m2, m3) < 0 ? m2 : (compare(m1, m3) < 0 ? m3 : m1))
   331         : (compare(m1, m3) < 0 ? m1 : (compare(m2, m3) < 0 ? m3 : m2));
   332 }
   333 
   334 /* ---------------------------------------------------------------------- */
   335 
   336 static void
   337 qsort_nonaligned(void *base, size_t nmemb, size_t size,
   338                  int (*compare) (const void *, const void *))
   339 {
   340 
   341     stack_entry stack[STACK_SIZE];
   342     int stacktop = 0;
   343     char *first, *last;
   344     char *pivot = malloc(size);
   345     size_t trunc = TRUNC_nonaligned * size;
   346     assert(pivot != 0);
   347 
   348     first = (char *) base;
   349     last = first + (nmemb - 1) * size;
   350 
   351     if ((size_t) (last - first) > trunc) {
   352         char *ffirst = first, *llast = last;
   353         while (1) {
   354             /* Select pivot */
   355             {
   356                 char *mid = first + size * ((last - first) / size >> 1);
   357                 Pivot(SWAP_nonaligned, size);
   358                 memcpy(pivot, mid, size);
   359             }
   360             /* Partition. */
   361             Partition(SWAP_nonaligned, size);
   362             /* Prepare to recurse/iterate. */
   363         Recurse(trunc)}
   364     }
   365     PreInsertion(SWAP_nonaligned, TRUNC_nonaligned, size);
   366     Insertion(SWAP_nonaligned);
   367     free(pivot);
   368 }
   369 
   370 static void
   371 qsort_aligned(void *base, size_t nmemb, size_t size,
   372               int (*compare) (const void *, const void *))
   373 {
   374 
   375     stack_entry stack[STACK_SIZE];
   376     int stacktop = 0;
   377     char *first, *last;
   378     char *pivot = malloc(size);
   379     size_t trunc = TRUNC_aligned * size;
   380     assert(pivot != 0);
   381 
   382     first = (char *) base;
   383     last = first + (nmemb - 1) * size;
   384 
   385     if ((size_t) (last - first) > trunc) {
   386         char *ffirst = first, *llast = last;
   387         while (1) {
   388             /* Select pivot */
   389             {
   390                 char *mid = first + size * ((last - first) / size >> 1);
   391                 Pivot(SWAP_aligned, size);
   392                 memcpy(pivot, mid, size);
   393             }
   394             /* Partition. */
   395             Partition(SWAP_aligned, size);
   396             /* Prepare to recurse/iterate. */
   397         Recurse(trunc)}
   398     }
   399     PreInsertion(SWAP_aligned, TRUNC_aligned, size);
   400     Insertion(SWAP_aligned);
   401     free(pivot);
   402 }
   403 
   404 static void
   405 qsort_words(void *base, size_t nmemb,
   406             int (*compare) (const void *, const void *))
   407 {
   408 
   409     stack_entry stack[STACK_SIZE];
   410     int stacktop = 0;
   411     char *first, *last;
   412     char *pivot = malloc(WORD_BYTES);
   413     assert(pivot != 0);
   414 
   415     first = (char *) base;
   416     last = first + (nmemb - 1) * WORD_BYTES;
   417 
   418     if (last - first > TRUNC_words) {
   419         char *ffirst = first, *llast = last;
   420         while (1) {
   421 #ifdef DEBUG_QSORT
   422             fprintf(stderr, "Doing %d:%d: ",
   423                     (first - (char *) base) / WORD_BYTES,
   424                     (last - (char *) base) / WORD_BYTES);
   425 #endif
   426             /* Select pivot */
   427             {
   428                 char *mid =
   429                     first + WORD_BYTES * ((last - first) / (2 * WORD_BYTES));
   430                 Pivot(SWAP_words, WORD_BYTES);
   431                 *(int *) pivot = *(int *) mid;
   432             }
   433 #ifdef DEBUG_QSORT
   434             fprintf(stderr, "pivot=%d\n", *(int *) pivot);
   435 #endif
   436             /* Partition. */
   437             Partition(SWAP_words, WORD_BYTES);
   438             /* Prepare to recurse/iterate. */
   439         Recurse(TRUNC_words)}
   440     }
   441     PreInsertion(SWAP_words, (TRUNC_words / WORD_BYTES), WORD_BYTES);
   442     /* Now do insertion sort. */
   443     last = ((char *) base) + nmemb * WORD_BYTES;
   444     for (first = ((char *) base) + WORD_BYTES; first != last;
   445          first += WORD_BYTES) {
   446         /* Find the right place for |first|. My apologies for var reuse */
   447         int *pl = (int *) (first - WORD_BYTES), *pr = (int *) first;
   448         *(int *) pivot = *(int *) first;
   449         for (; compare(pl, pivot) > 0; pr = pl, --pl) {
   450             *pr = *pl;
   451         }
   452         if (pr != (int *) first)
   453             *pr = *(int *) pivot;
   454     }
   455     free(pivot);
   456 }
   457 
   458 /* ---------------------------------------------------------------------- */
   459 
   460 void
   461 qsort(void *base, size_t nmemb, size_t size,
   462       int (*compare) (const void *, const void *))
   463 {
   464 
   465     if (nmemb <= 1)
   466         return;
   467     if (((uintptr_t) base | size) & (WORD_BYTES - 1))
   468         qsort_nonaligned(base, nmemb, size, compare);
   469     else if (size != WORD_BYTES)
   470         qsort_aligned(base, nmemb, size, compare);
   471     else
   472         qsort_words(base, nmemb, compare);
   473 }
   474 
   475 #endif /* !SDL_qsort */
   476 
   477 /* vi: set ts=4 sw=4 expandtab: */