src/stdlib/SDL_qsort.c
author Sam Lantinga
Fri, 05 Jul 2013 23:57:19 -0700
changeset 7351 668a3dc28361
parent 7003 eeaf77005c30
child 8093 b43765095a6f
permissions -rw-r--r--
Removed the inline functions from SDL_stdinc.h
Having the SDL functions inline is causing build issues, and in the case of malloc(), etc. causing malloc/free mismatches, if the application build environment differs from the SDL build environment.

In the interest of safety and consistency, the functions will always be in the SDL library and will only be redirected to the C library there, if they are available.

See the following threads on the SDL mailing list for the gruesome details:
* SDL_stdinc.h inlines problematic when application not compiled in exact same feature environment
* Error compiling program against SDL2 with -std=c++11 g++ flag
     1 /* qsort.c
     2  * (c) 1998 Gareth McCaughan
     3  *
     4  * This is a drop-in replacement for the C library's |qsort()| routine.
     5  *
     6  * Features:
     7  *   - Median-of-three pivoting (and more)
     8  *   - Truncation and final polishing by a single insertion sort
     9  *   - Early truncation when no swaps needed in pivoting step
    10  *   - Explicit recursion, guaranteed not to overflow
    11  *   - A few little wrinkles stolen from the GNU |qsort()|.
    12  *   - separate code for non-aligned / aligned / word-size objects
    13  *
    14  * This code may be reproduced freely provided
    15  *   - this file is retained unaltered apart from minor
    16  *     changes for portability and efficiency
    17  *   - no changes are made to this comment
    18  *   - any changes that *are* made are clearly flagged
    19  *   - the _ID string below is altered by inserting, after
    20  *     the date, the string " altered" followed at your option
    21  *     by other material. (Exceptions: you may change the name
    22  *     of the exported routine without changing the ID string.
    23  *     You may change the values of the macros TRUNC_* and
    24  *     PIVOT_THRESHOLD without changing the ID string, provided
    25  *     they remain constants with TRUNC_nonaligned, TRUNC_aligned
    26  *     and TRUNC_words/WORD_BYTES between 8 and 24, and
    27  *     PIVOT_THRESHOLD between 32 and 200.)
    28  *
    29  * You may use it in anything you like; you may make money
    30  * out of it; you may distribute it in object form or as
    31  * part of an executable without including source code;
    32  * you don't have to credit me. (But it would be nice if
    33  * you did.)
    34  *
    35  * If you find problems with this code, or find ways of
    36  * making it significantly faster, please let me know!
    37  * My e-mail address, valid as of early 1998 and certainly
    38  * OK for at least the next 18 months, is
    39  *    gjm11@dpmms.cam.ac.uk
    40  * Thanks!
    41  *
    42  * Gareth McCaughan   Peterhouse   Cambridge   1998
    43  */
    44 #include "SDL_config.h"
    45 
    46 /*
    47 #include <assert.h>
    48 #include <stdlib.h>
    49 #include <string.h>
    50 */
    51 #include "SDL_stdinc.h"
    52 #include "SDL_assert.h"
    53 
    54 #if defined(HAVE_QSORT)
    55 void
    56 SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
    57 {
    58     qsort(base, nmemb, size, compare);
    59 }
    60 #else
    61 
    62 #ifdef assert
    63 #undef assert
    64 #endif
    65 #define assert(X) SDL_assert(X)
    66 #ifdef malloc
    67 #undef malloc
    68 #endif
    69 #define malloc	SDL_malloc
    70 #ifdef free
    71 #undef free
    72 #endif
    73 #define free	SDL_free
    74 #ifdef memcpy
    75 #undef memcpy
    76 #endif
    77 #define memcpy	SDL_memcpy
    78 #ifdef memmove
    79 #undef memmove
    80 #endif
    81 #define memmove	SDL_memmove
    82 #ifdef qsort
    83 #undef qsort
    84 #endif
    85 #define qsort	SDL_qsort
    86 
    87 static const char _ID[] = "<qsort.c gjm 1.12 1998-03-19>";
    88 
    89 /* How many bytes are there per word? (Must be a power of 2,
    90  * and must in fact equal sizeof(int).)
    91  */
    92 #define WORD_BYTES sizeof(int)
    93 
    94 /* How big does our stack need to be? Answer: one entry per
    95  * bit in a |size_t|.
    96  */
    97 #define STACK_SIZE (8*sizeof(size_t))
    98 
    99 /* Different situations have slightly different requirements,
   100  * and we make life epsilon easier by using different truncation
   101  * points for the three different cases.
   102  * So far, I have tuned TRUNC_words and guessed that the same
   103  * value might work well for the other two cases. Of course
   104  * what works well on my machine might work badly on yours.
   105  */
   106 #define TRUNC_nonaligned	12
   107 #define TRUNC_aligned		12
   108 #define TRUNC_words		12*WORD_BYTES   /* nb different meaning */
   109 
   110 /* We use a simple pivoting algorithm for shortish sub-arrays
   111  * and a more complicated one for larger ones. The threshold
   112  * is PIVOT_THRESHOLD.
   113  */
   114 #define PIVOT_THRESHOLD 40
   115 
   116 typedef struct
   117 {
   118     char *first;
   119     char *last;
   120 } stack_entry;
   121 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
   122 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
   123 #define doLeft {first=ffirst;llast=last;continue;}
   124 #define doRight {ffirst=first;last=llast;continue;}
   125 #define pop {if (--stacktop<0) break;\
   126   first=ffirst=stack[stacktop].first;\
   127   last=llast=stack[stacktop].last;\
   128   continue;}
   129 
   130 /* Some comments on the implementation.
   131  * 1. When we finish partitioning the array into "low"
   132  *    and "high", we forget entirely about short subarrays,
   133  *    because they'll be done later by insertion sort.
   134  *    Doing lots of little insertion sorts might be a win
   135  *    on large datasets for locality-of-reference reasons,
   136  *    but it makes the code much nastier and increases
   137  *    bookkeeping overhead.
   138  * 2. We always save the shorter and get to work on the
   139  *    longer. This guarantees that every time we push
   140  *    an item onto the stack its size is <= 1/2 of that
   141  *    of its parent; so the stack can't need more than
   142  *    log_2(max-array-size) entries.
   143  * 3. We choose a pivot by looking at the first, last
   144  *    and middle elements. We arrange them into order
   145  *    because it's easy to do that in conjunction with
   146  *    choosing the pivot, and it makes things a little
   147  *    easier in the partitioning step. Anyway, the pivot
   148  *    is the middle of these three. It's still possible
   149  *    to construct datasets where the algorithm takes
   150  *    time of order n^2, but it simply never happens in
   151  *    practice.
   152  * 3' Newsflash: On further investigation I find that
   153  *    it's easy to construct datasets where median-of-3
   154  *    simply isn't good enough. So on large-ish subarrays
   155  *    we do a more sophisticated pivoting: we take three
   156  *    sets of 3 elements, find their medians, and then
   157  *    take the median of those.
   158  * 4. We copy the pivot element to a separate place
   159  *    because that way we can always do our comparisons
   160  *    directly against a pointer to that separate place,
   161  *    and don't have to wonder "did we move the pivot
   162  *    element?". This makes the inner loop better.
   163  * 5. It's possible to make the pivoting even more
   164  *    reliable by looking at more candidates when n
   165  *    is larger. (Taking this to its logical conclusion
   166  *    results in a variant of quicksort that doesn't
   167  *    have that n^2 worst case.) However, the overhead
   168  *    from the extra bookkeeping means that it's just
   169  *    not worth while.
   170  * 6. This is pretty clean and portable code. Here are
   171  *    all the potential portability pitfalls and problems
   172  *    I know of:
   173  *      - In one place (the insertion sort) I construct
   174  *        a pointer that points just past the end of the
   175  *        supplied array, and assume that (a) it won't
   176  *        compare equal to any pointer within the array,
   177  *        and (b) it will compare equal to a pointer
   178  *        obtained by stepping off the end of the array.
   179  *        These might fail on some segmented architectures.
   180  *      - I assume that there are 8 bits in a |char| when
   181  *        computing the size of stack needed. This would
   182  *        fail on machines with 9-bit or 16-bit bytes.
   183  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
   184  *        and |(size&(sizeof(int)-1))==0| then it's safe to
   185  *        get at array elements via |int*|s, and that if
   186  *        actually |size==sizeof(int)| as well then it's
   187  *        safe to treat the elements as |int|s. This might
   188  *        fail on systems that convert pointers to integers
   189  *        in non-standard ways.
   190  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
   191  *        would be false on a machine with 8-bit |char|s,
   192  *        16-bit |int|s and 4096-bit |size_t|s. :-)
   193  */
   194 
   195 /* The recursion logic is the same in each case: */
   196 #define Recurse(Trunc)				\
   197       { size_t l=last-ffirst,r=llast-first;	\
   198         if (l<Trunc) {				\
   199           if (r>=Trunc) doRight			\
   200           else pop				\
   201         }					\
   202         else if (l<=r) { pushLeft; doRight }	\
   203         else if (r>=Trunc) { pushRight; doLeft }\
   204         else doLeft				\
   205       }
   206 
   207 /* and so is the pivoting logic: */
   208 #define Pivot(swapper,sz)			\
   209   if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
   210   else {	\
   211     if (compare(first,mid)<0) {			\
   212       if (compare(mid,last)>0) {		\
   213         swapper(mid,last);			\
   214         if (compare(first,mid)>0) swapper(first,mid);\
   215       }						\
   216     }						\
   217     else {					\
   218       if (compare(mid,last)>0) swapper(first,last)\
   219       else {					\
   220         swapper(first,mid);			\
   221         if (compare(mid,last)>0) swapper(mid,last);\
   222       }						\
   223     }						\
   224     first+=sz; last-=sz;			\
   225   }
   226 
   227 #ifdef DEBUG_QSORT
   228 #include <stdio.h>
   229 #endif
   230 
   231 /* and so is the partitioning logic: */
   232 #define Partition(swapper,sz) {			\
   233   int swapped=0;				\
   234   do {						\
   235     while (compare(first,pivot)<0) first+=sz;	\
   236     while (compare(pivot,last)<0) last-=sz;	\
   237     if (first<last) {				\
   238       swapper(first,last); swapped=1;		\
   239       first+=sz; last-=sz; }			\
   240     else if (first==last) { first+=sz; last-=sz; break; }\
   241   } while (first<=last);			\
   242   if (!swapped) pop				\
   243 }
   244 
   245 /* and so is the pre-insertion-sort operation of putting
   246  * the smallest element into place as a sentinel.
   247  * Doing this makes the inner loop nicer. I got this
   248  * idea from the GNU implementation of qsort().
   249  */
   250 #define PreInsertion(swapper,limit,sz)		\
   251   first=base;					\
   252   last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
   253   while (last!=base) {				\
   254     if (compare(first,last)>0) first=last;	\
   255     last-=sz; }					\
   256   if (first!=base) swapper(first,(char*)base);
   257 
   258 /* and so is the insertion sort, in the first two cases: */
   259 #define Insertion(swapper)			\
   260   last=((char*)base)+nmemb*size;		\
   261   for (first=((char*)base)+size;first!=last;first+=size) {	\
   262     char *test;					\
   263     /* Find the right place for |first|.	\
   264      * My apologies for var reuse. */		\
   265     for (test=first-size;compare(test,first)>0;test-=size) ;	\
   266     test+=size;					\
   267     if (test!=first) {				\
   268       /* Shift everything in [test,first)	\
   269        * up by one, and place |first|		\
   270        * where |test| is. */			\
   271       memcpy(pivot,first,size);			\
   272       memmove(test+size,test,first-test);	\
   273       memcpy(test,pivot,size);			\
   274     }						\
   275   }
   276 
   277 #define SWAP_nonaligned(a,b) { \
   278   register char *aa=(a),*bb=(b); \
   279   register size_t sz=size; \
   280   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
   281 
   282 #define SWAP_aligned(a,b) { \
   283   register int *aa=(int*)(a),*bb=(int*)(b); \
   284   register size_t sz=size; \
   285   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
   286 
   287 #define SWAP_words(a,b) { \
   288   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
   289 
   290 /* ---------------------------------------------------------------------- */
   291 
   292 static char *
   293 pivot_big(char *first, char *mid, char *last, size_t size,
   294           int compare(const void *, const void *))
   295 {
   296     size_t d = (((last - first) / size) >> 3) * size;
   297     char *m1, *m2, *m3;
   298     {
   299         char *a = first, *b = first + d, *c = first + 2 * d;
   300 #ifdef DEBUG_QSORT
   301         fprintf(stderr, "< %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
   302 #endif
   303         m1 = compare(a, b) < 0 ?
   304             (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
   305             : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
   306     }
   307     {
   308         char *a = mid - d, *b = mid, *c = mid + d;
   309 #ifdef DEBUG_QSORT
   310         fprintf(stderr, ". %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
   311 #endif
   312         m2 = compare(a, b) < 0 ?
   313             (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
   314             : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
   315     }
   316     {
   317         char *a = last - 2 * d, *b = last - d, *c = last;
   318 #ifdef DEBUG_QSORT
   319         fprintf(stderr, "> %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
   320 #endif
   321         m3 = compare(a, b) < 0 ?
   322             (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
   323             : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
   324     }
   325 #ifdef DEBUG_QSORT
   326     fprintf(stderr, "-> %d %d %d\n", *(int *) m1, *(int *) m2, *(int *) m3);
   327 #endif
   328     return compare(m1, m2) < 0 ?
   329         (compare(m2, m3) < 0 ? m2 : (compare(m1, m3) < 0 ? m3 : m1))
   330         : (compare(m1, m3) < 0 ? m1 : (compare(m2, m3) < 0 ? m3 : m2));
   331 }
   332 
   333 /* ---------------------------------------------------------------------- */
   334 
   335 static void
   336 qsort_nonaligned(void *base, size_t nmemb, size_t size,
   337                  int (*compare) (const void *, const void *))
   338 {
   339 
   340     stack_entry stack[STACK_SIZE];
   341     int stacktop = 0;
   342     char *first, *last;
   343     char *pivot = malloc(size);
   344     size_t trunc = TRUNC_nonaligned * size;
   345     assert(pivot != 0);
   346 
   347     first = (char *) base;
   348     last = first + (nmemb - 1) * size;
   349 
   350     if ((size_t) (last - first) > trunc) {
   351         char *ffirst = first, *llast = last;
   352         while (1) {
   353             /* Select pivot */
   354             {
   355                 char *mid = first + size * ((last - first) / size >> 1);
   356                 Pivot(SWAP_nonaligned, size);
   357                 memcpy(pivot, mid, size);
   358             }
   359             /* Partition. */
   360             Partition(SWAP_nonaligned, size);
   361             /* Prepare to recurse/iterate. */
   362         Recurse(trunc)}
   363     }
   364     PreInsertion(SWAP_nonaligned, TRUNC_nonaligned, size);
   365     Insertion(SWAP_nonaligned);
   366     free(pivot);
   367 }
   368 
   369 static void
   370 qsort_aligned(void *base, size_t nmemb, size_t size,
   371               int (*compare) (const void *, const void *))
   372 {
   373 
   374     stack_entry stack[STACK_SIZE];
   375     int stacktop = 0;
   376     char *first, *last;
   377     char *pivot = malloc(size);
   378     size_t trunc = TRUNC_aligned * size;
   379     assert(pivot != 0);
   380 
   381     first = (char *) base;
   382     last = first + (nmemb - 1) * size;
   383 
   384     if ((size_t) (last - first) > trunc) {
   385         char *ffirst = first, *llast = last;
   386         while (1) {
   387             /* Select pivot */
   388             {
   389                 char *mid = first + size * ((last - first) / size >> 1);
   390                 Pivot(SWAP_aligned, size);
   391                 memcpy(pivot, mid, size);
   392             }
   393             /* Partition. */
   394             Partition(SWAP_aligned, size);
   395             /* Prepare to recurse/iterate. */
   396         Recurse(trunc)}
   397     }
   398     PreInsertion(SWAP_aligned, TRUNC_aligned, size);
   399     Insertion(SWAP_aligned);
   400     free(pivot);
   401 }
   402 
   403 static void
   404 qsort_words(void *base, size_t nmemb,
   405             int (*compare) (const void *, const void *))
   406 {
   407 
   408     stack_entry stack[STACK_SIZE];
   409     int stacktop = 0;
   410     char *first, *last;
   411     char *pivot = malloc(WORD_BYTES);
   412     assert(pivot != 0);
   413 
   414     first = (char *) base;
   415     last = first + (nmemb - 1) * WORD_BYTES;
   416 
   417     if (last - first > TRUNC_words) {
   418         char *ffirst = first, *llast = last;
   419         while (1) {
   420 #ifdef DEBUG_QSORT
   421             fprintf(stderr, "Doing %d:%d: ",
   422                     (first - (char *) base) / WORD_BYTES,
   423                     (last - (char *) base) / WORD_BYTES);
   424 #endif
   425             /* Select pivot */
   426             {
   427                 char *mid =
   428                     first + WORD_BYTES * ((last - first) / (2 * WORD_BYTES));
   429                 Pivot(SWAP_words, WORD_BYTES);
   430                 *(int *) pivot = *(int *) mid;
   431             }
   432 #ifdef DEBUG_QSORT
   433             fprintf(stderr, "pivot=%d\n", *(int *) pivot);
   434 #endif
   435             /* Partition. */
   436             Partition(SWAP_words, WORD_BYTES);
   437             /* Prepare to recurse/iterate. */
   438         Recurse(TRUNC_words)}
   439     }
   440     PreInsertion(SWAP_words, (TRUNC_words / WORD_BYTES), WORD_BYTES);
   441     /* Now do insertion sort. */
   442     last = ((char *) base) + nmemb * WORD_BYTES;
   443     for (first = ((char *) base) + WORD_BYTES; first != last;
   444          first += WORD_BYTES) {
   445         /* Find the right place for |first|. My apologies for var reuse */
   446         int *pl = (int *) (first - WORD_BYTES), *pr = (int *) first;
   447         *(int *) pivot = *(int *) first;
   448         for (; compare(pl, pivot) > 0; pr = pl, --pl) {
   449             *pr = *pl;
   450         }
   451         if (pr != (int *) first)
   452             *pr = *(int *) pivot;
   453     }
   454     free(pivot);
   455 }
   456 
   457 /* ---------------------------------------------------------------------- */
   458 
   459 void
   460 qsort(void *base, size_t nmemb, size_t size,
   461       int (*compare) (const void *, const void *))
   462 {
   463 
   464     if (nmemb <= 1)
   465         return;
   466     if (((uintptr_t) base | size) & (WORD_BYTES - 1))
   467         qsort_nonaligned(base, nmemb, size, compare);
   468     else if (size != WORD_BYTES)
   469         qsort_aligned(base, nmemb, size, compare);
   470     else
   471         qsort_words(base, nmemb, compare);
   472 }
   473 
   474 #endif /* !SDL_qsort */
   475 
   476 /* vi: set ts=4 sw=4 expandtab: */