src/stdlib/SDL_qsort.c
author Ryan C. Gordon <icculus@icculus.org>
Wed, 30 Mar 2016 13:30:59 -0400
branchSDL-1.2
changeset 10133 b1d2398f2655
parent 10131 8ab0633c9d43
permissions -rw-r--r--
1.2 stdlib: include SDL_config.h so we can see if HAVE_QSORT is actually set.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2016 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 
    22 #include "SDL_config.h"
    23 
    24 #ifndef HAVE_QSORT
    25 
    26 #ifdef assert
    27 #undef assert
    28 #endif
    29 #define assert(X)
    30 #ifdef malloc
    31 #undef malloc
    32 #endif
    33 #define malloc SDL_malloc
    34 #ifdef free
    35 #undef free
    36 #endif
    37 #define free SDL_free
    38 #ifdef memcpy
    39 #undef memcpy
    40 #endif
    41 #define memcpy SDL_memcpy
    42 #ifdef memmove
    43 #undef memmove
    44 #endif
    45 #define memmove SDL_memmove
    46 #ifdef qsortG
    47 #undef qsortG
    48 #endif
    49 #define qsortG SDL_qsort
    50 
    51 /*
    52 This code came from Gareth McCaughan, under the zlib license.
    53 Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.14
    54 
    55 Everything below this comment until the HAVE_QSORT #endif was from Gareth
    56 (any minor changes will be noted inline).
    57 
    58 Thank you to Gareth for relicensing this code under the zlib license for our
    59 benefit!
    60 
    61 --ryan.
    62 */
    63 
    64 /* This is a drop-in replacement for the C library's |qsort()| routine.
    65  *
    66  * It is intended for use where you know or suspect that your
    67  * platform's qsort is bad. If that isn't the case, then you
    68  * should probably use the qsort your system gives you in preference
    69  * to mine -- it will likely have been tested and tuned better.
    70  *
    71  * Features:
    72  *   - Median-of-three pivoting (and more)
    73  *   - Truncation and final polishing by a single insertion sort
    74  *   - Early truncation when no swaps needed in pivoting step
    75  *   - Explicit recursion, guaranteed not to overflow
    76  *   - A few little wrinkles stolen from the GNU |qsort()|.
    77  *     (For the avoidance of doubt, no code was stolen, only
    78  *     broad ideas.)
    79  *   - separate code for non-aligned / aligned / word-size objects
    80  *
    81  * Earlier releases of this code used an idiosyncratic licence
    82  * I wrote myself, because I'm an idiot. The code is now released
    83  * under the "zlib/libpng licence"; you will find the actual
    84  * terms in the next comment. I request (but do not require)
    85  * that if you make any changes beyond the name of the exported
    86  * routine and reasonable tweaks to the TRUNC_* and
    87  * PIVOT_THRESHOLD values, you modify the _ID string so as
    88  * to make it clear that you have changed the code.
    89  *
    90  * If you find problems with this code, or find ways of
    91  * making it significantly faster, please let me know!
    92  * My e-mail address, valid as of early 2016 and for the
    93  * foreseeable future, is
    94  *    gareth.mccaughan@pobox.com
    95  * Thanks!
    96  *
    97  * Gareth McCaughan
    98  */
    99 
   100 /* Copyright (c) 1998-2016 Gareth McCaughan
   101  *
   102  * This software is provided 'as-is', without any express or implied
   103  * warranty. In no event will the authors be held liable for any
   104  * damages arising from the use of this software.
   105  *
   106  * Permission is granted to anyone to use this software for any purpose,
   107  * including commercial applications, and to alter it and redistribute it
   108  * freely, subject to the following restrictions:
   109  *
   110  * 1. The origin of this software must not be misrepresented;
   111  *    you must not claim that you wrote the original software.
   112  *    If you use this software in a product, an acknowledgment
   113  *    in the product documentation would be appreciated but
   114  *    is not required.
   115  *
   116  * 2. Altered source versions must be plainly marked as such,
   117  *    and must not be misrepresented as being the original software.
   118  *
   119  * 3. This notice may not be removed or altered from any source
   120  *    distribution.
   121  */
   122 
   123 /* Revision history since release:
   124  *   1998-03-19 v1.12 First release I have any records of.
   125  *   2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh
   126  *                    (premature termination of recursion).
   127  *                    Add a few clarifying comments.
   128  *                    Minor improvements to debug output.
   129  *   2016-02-21 v1.14 Replace licence with 2-clause BSD,
   130  *                    and clarify a couple of things in
   131  *                    comments. No code changes.
   132  */
   133 
   134 #include <assert.h>
   135 #include <stdlib.h>
   136 #include <string.h>
   137 
   138 /* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
   139 #if 0
   140 #define DEBUG_QSORT
   141 
   142 static char _ID[]="<qsort.c gjm 1.14 2016-02-21>";
   143 #endif
   144 /* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
   145 
   146 /* How many bytes are there per word? (Must be a power of 2,
   147  * and must in fact equal sizeof(int).)
   148  */
   149 #define WORD_BYTES sizeof(int)
   150 
   151 /* How big does our stack need to be? Answer: one entry per
   152  * bit in a |size_t|.
   153  */
   154 #define STACK_SIZE (8*sizeof(size_t))
   155 
   156 /* Different situations have slightly different requirements,
   157  * and we make life epsilon easier by using different truncation
   158  * points for the three different cases.
   159  * So far, I have tuned TRUNC_words and guessed that the same
   160  * value might work well for the other two cases. Of course
   161  * what works well on my machine might work badly on yours.
   162  */
   163 #define TRUNC_nonaligned	12
   164 #define TRUNC_aligned		12
   165 #define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
   166 
   167 /* We use a simple pivoting algorithm for shortish sub-arrays
   168  * and a more complicated one for larger ones. The threshold
   169  * is PIVOT_THRESHOLD.
   170  */
   171 #define PIVOT_THRESHOLD 40
   172 
   173 typedef struct { char * first; char * last; } stack_entry;
   174 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
   175 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
   176 #define doLeft {first=ffirst;llast=last;continue;}
   177 #define doRight {ffirst=first;last=llast;continue;}
   178 #define pop {if (--stacktop<0) break;\
   179   first=ffirst=stack[stacktop].first;\
   180   last=llast=stack[stacktop].last;\
   181   continue;}
   182 
   183 /* Some comments on the implementation.
   184  * 1. When we finish partitioning the array into "low"
   185  *    and "high", we forget entirely about short subarrays,
   186  *    because they'll be done later by insertion sort.
   187  *    Doing lots of little insertion sorts might be a win
   188  *    on large datasets for locality-of-reference reasons,
   189  *    but it makes the code much nastier and increases
   190  *    bookkeeping overhead.
   191  * 2. We always save the shorter and get to work on the
   192  *    longer. This guarantees that every time we push
   193  *    an item onto the stack its size is <= 1/2 of that
   194  *    of its parent; so the stack can't need more than
   195  *    log_2(max-array-size) entries.
   196  * 3. We choose a pivot by looking at the first, last
   197  *    and middle elements. We arrange them into order
   198  *    because it's easy to do that in conjunction with
   199  *    choosing the pivot, and it makes things a little
   200  *    easier in the partitioning step. Anyway, the pivot
   201  *    is the middle of these three. It's still possible
   202  *    to construct datasets where the algorithm takes
   203  *    time of order n^2, but it simply never happens in
   204  *    practice.
   205  * 3' Newsflash: On further investigation I find that
   206  *    it's easy to construct datasets where median-of-3
   207  *    simply isn't good enough. So on large-ish subarrays
   208  *    we do a more sophisticated pivoting: we take three
   209  *    sets of 3 elements, find their medians, and then
   210  *    take the median of those.
   211  * 4. We copy the pivot element to a separate place
   212  *    because that way we can always do our comparisons
   213  *    directly against a pointer to that separate place,
   214  *    and don't have to wonder "did we move the pivot
   215  *    element?". This makes the inner loop better.
   216  * 5. It's possible to make the pivoting even more
   217  *    reliable by looking at more candidates when n
   218  *    is larger. (Taking this to its logical conclusion
   219  *    results in a variant of quicksort that doesn't
   220  *    have that n^2 worst case.) However, the overhead
   221  *    from the extra bookkeeping means that it's just
   222  *    not worth while.
   223  * 6. This is pretty clean and portable code. Here are
   224  *    all the potential portability pitfalls and problems
   225  *    I know of:
   226  *      - In one place (the insertion sort) I construct
   227  *        a pointer that points just past the end of the
   228  *        supplied array, and assume that (a) it won't
   229  *        compare equal to any pointer within the array,
   230  *        and (b) it will compare equal to a pointer
   231  *        obtained by stepping off the end of the array.
   232  *        These might fail on some segmented architectures.
   233  *      - I assume that there are 8 bits in a |char| when
   234  *        computing the size of stack needed. This would
   235  *        fail on machines with 9-bit or 16-bit bytes.
   236  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
   237  *        and |(size&(sizeof(int)-1))==0| then it's safe to
   238  *        get at array elements via |int*|s, and that if
   239  *        actually |size==sizeof(int)| as well then it's
   240  *        safe to treat the elements as |int|s. This might
   241  *        fail on systems that convert pointers to integers
   242  *        in non-standard ways.
   243  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
   244  *        would be false on a machine with 8-bit |char|s,
   245  *        16-bit |int|s and 4096-bit |size_t|s. :-)
   246  */
   247 
   248 /* The recursion logic is the same in each case.
   249  * We keep chopping up until we reach subarrays of size
   250  * strictly less than Trunc; we leave these unsorted. */
   251 #define Recurse(Trunc)				\
   252       { size_t l=last-ffirst,r=llast-first;	\
   253         if (l<Trunc) {				\
   254           if (r>=Trunc) doRight			\
   255           else pop				\
   256         }					\
   257         else if (l<=r) { pushLeft; doRight }	\
   258         else if (r>=Trunc) { pushRight; doLeft }\
   259         else doLeft				\
   260       }
   261 
   262 /* and so is the pivoting logic (note: last is inclusive): */
   263 #define Pivot(swapper,sz)			\
   264   if (last-first>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
   265   else {	\
   266     if (compare(first,mid)<0) {			\
   267       if (compare(mid,last)>0) {		\
   268         swapper(mid,last);			\
   269         if (compare(first,mid)>0) swapper(first,mid);\
   270       }						\
   271     }						\
   272     else {					\
   273       if (compare(mid,last)>0) swapper(first,last)\
   274       else {					\
   275         swapper(first,mid);			\
   276         if (compare(mid,last)>0) swapper(mid,last);\
   277       }						\
   278     }						\
   279     first+=sz; last-=sz;			\
   280   }
   281 
   282 #ifdef DEBUG_QSORT
   283 #include <stdio.h>
   284 #endif
   285 
   286 /* and so is the partitioning logic: */
   287 #define Partition(swapper,sz) {			\
   288   do {						\
   289     while (compare(first,pivot)<0) first+=sz;	\
   290     while (compare(pivot,last)<0) last-=sz;	\
   291     if (first<last) {				\
   292       swapper(first,last);			\
   293       first+=sz; last-=sz; }			\
   294     else if (first==last) { first+=sz; last-=sz; break; }\
   295   } while (first<=last);			\
   296 }
   297 
   298 /* and so is the pre-insertion-sort operation of putting
   299  * the smallest element into place as a sentinel.
   300  * Doing this makes the inner loop nicer. I got this
   301  * idea from the GNU implementation of qsort().
   302  * We find the smallest element from the first |nmemb|,
   303  * or the first |limit|, whichever is smaller;
   304  * therefore we must have ensured that the globally smallest
   305  * element is in the first |limit|.
   306  */
   307 #define PreInsertion(swapper,limit,sz)		\
   308   first=base;					\
   309   last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\
   310   while (last!=base) {				\
   311     if (compare(first,last)>0) first=last;	\
   312     last-=sz; }					\
   313   if (first!=base) swapper(first,(char*)base);
   314 
   315 /* and so is the insertion sort, in the first two cases: */
   316 #define Insertion(swapper)			\
   317   last=((char*)base)+nmemb*size;		\
   318   for (first=((char*)base)+size;first!=last;first+=size) {	\
   319     char *test;					\
   320     /* Find the right place for |first|.	\
   321      * My apologies for var reuse. */		\
   322     for (test=first-size;compare(test,first)>0;test-=size) ;	\
   323     test+=size;					\
   324     if (test!=first) {				\
   325       /* Shift everything in [test,first)	\
   326        * up by one, and place |first|		\
   327        * where |test| is. */			\
   328       memcpy(pivot,first,size);			\
   329       memmove(test+size,test,first-test);	\
   330       memcpy(test,pivot,size);			\
   331     }						\
   332   }
   333 
   334 #define SWAP_nonaligned(a,b) { \
   335   register char *aa=(a),*bb=(b); \
   336   register size_t sz=size; \
   337   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
   338 
   339 #define SWAP_aligned(a,b) { \
   340   register int *aa=(int*)(a),*bb=(int*)(b); \
   341   register size_t sz=size; \
   342   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
   343 
   344 #define SWAP_words(a,b) { \
   345   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
   346 
   347 /* ---------------------------------------------------------------------- */
   348 
   349 static char * pivot_big(char *first, char *mid, char *last, size_t size,
   350                         int compare(const void *, const void *)) {
   351   int d=(((last-first)/size)>>3)*size;
   352 #ifdef DEBUG_QSORT
   353 fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));
   354 #endif
   355   char *m1,*m2,*m3;
   356   { char *a=first, *b=first+d, *c=first+2*d;
   357 #ifdef DEBUG_QSORT
   358 fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   359 #endif
   360     m1 = compare(a,b)<0 ?
   361            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   362          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   363   }
   364   { char *a=mid-d, *b=mid, *c=mid+d;
   365 #ifdef DEBUG_QSORT
   366 fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   367 #endif
   368     m2 = compare(a,b)<0 ?
   369            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   370          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   371   }
   372   { char *a=last-2*d, *b=last-d, *c=last;
   373 #ifdef DEBUG_QSORT
   374 fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   375 #endif
   376     m3 = compare(a,b)<0 ?
   377            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   378          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   379   }
   380 #ifdef DEBUG_QSORT
   381 fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);
   382 #endif
   383   return compare(m1,m2)<0 ?
   384            (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
   385          : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
   386 }
   387 
   388 /* ---------------------------------------------------------------------- */
   389 
   390 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
   391            int (*compare)(const void *, const void *)) {
   392 
   393   stack_entry stack[STACK_SIZE];
   394   int stacktop=0;
   395   char *first,*last;
   396   char *pivot=malloc(size);
   397   size_t trunc=TRUNC_nonaligned*size;
   398   assert(pivot!=0);
   399 
   400   first=(char*)base; last=first+(nmemb-1)*size;
   401 
   402   if (last-first>=trunc) {
   403     char *ffirst=first, *llast=last;
   404     while (1) {
   405       /* Select pivot */
   406       { char * mid=first+size*((last-first)/size >> 1);
   407         Pivot(SWAP_nonaligned,size);
   408         memcpy(pivot,mid,size);
   409       }
   410       /* Partition. */
   411       Partition(SWAP_nonaligned,size);
   412       /* Prepare to recurse/iterate. */
   413       Recurse(trunc)
   414     }
   415   }
   416   PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
   417   Insertion(SWAP_nonaligned);
   418   free(pivot);
   419 }
   420 
   421 static void qsort_aligned(void *base, size_t nmemb, size_t size,
   422            int (*compare)(const void *, const void *)) {
   423 
   424   stack_entry stack[STACK_SIZE];
   425   int stacktop=0;
   426   char *first,*last;
   427   char *pivot=malloc(size);
   428   size_t trunc=TRUNC_aligned*size;
   429   assert(pivot!=0);
   430 
   431   first=(char*)base; last=first+(nmemb-1)*size;
   432 
   433   if (last-first>=trunc) {
   434     char *ffirst=first,*llast=last;
   435     while (1) {
   436       /* Select pivot */
   437       { char * mid=first+size*((last-first)/size >> 1);
   438         Pivot(SWAP_aligned,size);
   439         memcpy(pivot,mid,size);
   440       }
   441       /* Partition. */
   442       Partition(SWAP_aligned,size);
   443       /* Prepare to recurse/iterate. */
   444       Recurse(trunc)
   445     }
   446   }
   447   PreInsertion(SWAP_aligned,TRUNC_aligned,size);
   448   Insertion(SWAP_aligned);
   449   free(pivot);
   450 }
   451 
   452 static void qsort_words(void *base, size_t nmemb,
   453            int (*compare)(const void *, const void *)) {
   454 
   455   stack_entry stack[STACK_SIZE];
   456   int stacktop=0;
   457   char *first,*last;
   458   char *pivot=malloc(WORD_BYTES);
   459   assert(pivot!=0);
   460 
   461   first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
   462 
   463   if (last-first>=TRUNC_words) {
   464     char *ffirst=first, *llast=last;
   465     while (1) {
   466 #ifdef DEBUG_QSORT
   467 fprintf(stderr,"Doing %d:%d: ",
   468         (first-(char*)base)/WORD_BYTES,
   469         (last-(char*)base)/WORD_BYTES);
   470 #endif
   471       /* Select pivot */
   472       { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
   473         Pivot(SWAP_words,WORD_BYTES);
   474         *(int*)pivot=*(int*)mid;
   475 #ifdef DEBUG_QSORT
   476 fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);
   477 #endif
   478       }
   479       /* Partition. */
   480       Partition(SWAP_words,WORD_BYTES);
   481 #ifdef DEBUG_QSORT
   482 fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);
   483 #endif
   484       /* Prepare to recurse/iterate. */
   485       Recurse(TRUNC_words)
   486     }
   487   }
   488   PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES);
   489   /* Now do insertion sort. */
   490   last=((char*)base)+nmemb*WORD_BYTES;
   491   for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
   492     /* Find the right place for |first|. My apologies for var reuse */
   493     int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
   494     *(int*)pivot=*(int*)first;
   495     for (;compare(pl,pivot)>0;pr=pl,--pl) {
   496       *pr=*pl; }
   497     if (pr!=(int*)first) *pr=*(int*)pivot;
   498   }
   499   free(pivot);
   500 }
   501 
   502 /* ---------------------------------------------------------------------- */
   503 
   504 extern void qsortG(void *base, size_t nmemb, size_t size,
   505            int (*compare)(const void *, const void *)) {
   506 
   507   if (nmemb<=1) return;
   508   if (((int)base|size)&(WORD_BYTES-1))
   509     qsort_nonaligned(base,nmemb,size,compare);
   510   else if (size!=WORD_BYTES)
   511     qsort_aligned(base,nmemb,size,compare);
   512   else
   513     qsort_words(base,nmemb,compare);
   514 }
   515 
   516 #endif /* ifndef HAVE_QSORT */
   517 
   518 /* vi: set ts=4 sw=4 expandtab: */
   519