src/stdlib/SDL_qsort.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 01 Jan 2017 18:33:28 -0800
changeset 10737 3406a0f8b041
parent 10599 ade40eb390dc
child 11416 7e7d8a125d6a
permissions -rw-r--r--
Updated copyright for 2017
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 
    22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
    23 #define SDL_DISABLE_ANALYZE_MACROS 1
    24 #endif
    25 
    26 #include "../SDL_internal.h"
    27 
    28 #include "SDL_stdinc.h"
    29 #include "SDL_assert.h"
    30 
    31 #if defined(HAVE_QSORT)
    32 void
    33 SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
    34 {
    35     qsort(base, nmemb, size, compare);
    36 }
    37 
    38 #else
    39 
    40 #ifdef assert
    41 #undef assert
    42 #endif
    43 #define assert SDL_assert
    44 #ifdef malloc
    45 #undef malloc
    46 #endif
    47 #define malloc SDL_malloc
    48 #ifdef free
    49 #undef free
    50 #endif
    51 #define free SDL_free
    52 #ifdef memcpy
    53 #undef memcpy
    54 #endif
    55 #define memcpy SDL_memcpy
    56 #ifdef memmove
    57 #undef memmove
    58 #endif
    59 #define memmove SDL_memmove
    60 #ifdef qsortG
    61 #undef qsortG
    62 #endif
    63 #define qsortG SDL_qsort
    64 
    65 /*
    66 This code came from Gareth McCaughan, under the zlib license.
    67 Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.14
    68 
    69 Everything below this comment until the HAVE_QSORT #endif was from Gareth
    70 (any minor changes will be noted inline).
    71 
    72 Thank you to Gareth for relicensing this code under the zlib license for our
    73 benefit!
    74 
    75 --ryan.
    76 */
    77 
    78 /* This is a drop-in replacement for the C library's |qsort()| routine.
    79  *
    80  * It is intended for use where you know or suspect that your
    81  * platform's qsort is bad. If that isn't the case, then you
    82  * should probably use the qsort your system gives you in preference
    83  * to mine -- it will likely have been tested and tuned better.
    84  *
    85  * Features:
    86  *   - Median-of-three pivoting (and more)
    87  *   - Truncation and final polishing by a single insertion sort
    88  *   - Early truncation when no swaps needed in pivoting step
    89  *   - Explicit recursion, guaranteed not to overflow
    90  *   - A few little wrinkles stolen from the GNU |qsort()|.
    91  *     (For the avoidance of doubt, no code was stolen, only
    92  *     broad ideas.)
    93  *   - separate code for non-aligned / aligned / word-size objects
    94  *
    95  * Earlier releases of this code used an idiosyncratic licence
    96  * I wrote myself, because I'm an idiot. The code is now released
    97  * under the "zlib/libpng licence"; you will find the actual
    98  * terms in the next comment. I request (but do not require)
    99  * that if you make any changes beyond the name of the exported
   100  * routine and reasonable tweaks to the TRUNC_* and
   101  * PIVOT_THRESHOLD values, you modify the _ID string so as
   102  * to make it clear that you have changed the code.
   103  *
   104  * If you find problems with this code, or find ways of
   105  * making it significantly faster, please let me know!
   106  * My e-mail address, valid as of early 2016 and for the
   107  * foreseeable future, is
   108  *    gareth.mccaughan@pobox.com
   109  * Thanks!
   110  *
   111  * Gareth McCaughan
   112  */
   113 
   114 /* Copyright (c) 1998-2016 Gareth McCaughan
   115  *
   116  * This software is provided 'as-is', without any express or implied
   117  * warranty. In no event will the authors be held liable for any
   118  * damages arising from the use of this software.
   119  *
   120  * Permission is granted to anyone to use this software for any purpose,
   121  * including commercial applications, and to alter it and redistribute it
   122  * freely, subject to the following restrictions:
   123  *
   124  * 1. The origin of this software must not be misrepresented;
   125  *    you must not claim that you wrote the original software.
   126  *    If you use this software in a product, an acknowledgment
   127  *    in the product documentation would be appreciated but
   128  *    is not required.
   129  *
   130  * 2. Altered source versions must be plainly marked as such,
   131  *    and must not be misrepresented as being the original software.
   132  *
   133  * 3. This notice may not be removed or altered from any source
   134  *    distribution.
   135  */
   136 
   137 /* Revision history since release:
   138  *   1998-03-19 v1.12 First release I have any records of.
   139  *   2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh
   140  *                    (premature termination of recursion).
   141  *                    Add a few clarifying comments.
   142  *                    Minor improvements to debug output.
   143  *   2016-02-21 v1.14 Replace licence with 2-clause BSD,
   144  *                    and clarify a couple of things in
   145  *                    comments. No code changes.
   146  */
   147 
   148 /* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
   149 #if 0
   150 #include <assert.h>
   151 #include <stdlib.h>
   152 #include <string.h>
   153 
   154 #define DEBUG_QSORT
   155 
   156 static char _ID[]="<qsort.c gjm 1.14 2016-02-21>";
   157 #endif
   158 /* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
   159 
   160 /* How many bytes are there per word? (Must be a power of 2,
   161  * and must in fact equal sizeof(int).)
   162  */
   163 #define WORD_BYTES sizeof(int)
   164 
   165 /* How big does our stack need to be? Answer: one entry per
   166  * bit in a |size_t|.
   167  */
   168 #define STACK_SIZE (8*sizeof(size_t))
   169 
   170 /* Different situations have slightly different requirements,
   171  * and we make life epsilon easier by using different truncation
   172  * points for the three different cases.
   173  * So far, I have tuned TRUNC_words and guessed that the same
   174  * value might work well for the other two cases. Of course
   175  * what works well on my machine might work badly on yours.
   176  */
   177 #define TRUNC_nonaligned	12
   178 #define TRUNC_aligned		12
   179 #define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
   180 
   181 /* We use a simple pivoting algorithm for shortish sub-arrays
   182  * and a more complicated one for larger ones. The threshold
   183  * is PIVOT_THRESHOLD.
   184  */
   185 #define PIVOT_THRESHOLD 40
   186 
   187 typedef struct { char * first; char * last; } stack_entry;
   188 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
   189 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
   190 #define doLeft {first=ffirst;llast=last;continue;}
   191 #define doRight {ffirst=first;last=llast;continue;}
   192 #define pop {if (--stacktop<0) break;\
   193   first=ffirst=stack[stacktop].first;\
   194   last=llast=stack[stacktop].last;\
   195   continue;}
   196 
   197 /* Some comments on the implementation.
   198  * 1. When we finish partitioning the array into "low"
   199  *    and "high", we forget entirely about short subarrays,
   200  *    because they'll be done later by insertion sort.
   201  *    Doing lots of little insertion sorts might be a win
   202  *    on large datasets for locality-of-reference reasons,
   203  *    but it makes the code much nastier and increases
   204  *    bookkeeping overhead.
   205  * 2. We always save the shorter and get to work on the
   206  *    longer. This guarantees that every time we push
   207  *    an item onto the stack its size is <= 1/2 of that
   208  *    of its parent; so the stack can't need more than
   209  *    log_2(max-array-size) entries.
   210  * 3. We choose a pivot by looking at the first, last
   211  *    and middle elements. We arrange them into order
   212  *    because it's easy to do that in conjunction with
   213  *    choosing the pivot, and it makes things a little
   214  *    easier in the partitioning step. Anyway, the pivot
   215  *    is the middle of these three. It's still possible
   216  *    to construct datasets where the algorithm takes
   217  *    time of order n^2, but it simply never happens in
   218  *    practice.
   219  * 3' Newsflash: On further investigation I find that
   220  *    it's easy to construct datasets where median-of-3
   221  *    simply isn't good enough. So on large-ish subarrays
   222  *    we do a more sophisticated pivoting: we take three
   223  *    sets of 3 elements, find their medians, and then
   224  *    take the median of those.
   225  * 4. We copy the pivot element to a separate place
   226  *    because that way we can always do our comparisons
   227  *    directly against a pointer to that separate place,
   228  *    and don't have to wonder "did we move the pivot
   229  *    element?". This makes the inner loop better.
   230  * 5. It's possible to make the pivoting even more
   231  *    reliable by looking at more candidates when n
   232  *    is larger. (Taking this to its logical conclusion
   233  *    results in a variant of quicksort that doesn't
   234  *    have that n^2 worst case.) However, the overhead
   235  *    from the extra bookkeeping means that it's just
   236  *    not worth while.
   237  * 6. This is pretty clean and portable code. Here are
   238  *    all the potential portability pitfalls and problems
   239  *    I know of:
   240  *      - In one place (the insertion sort) I construct
   241  *        a pointer that points just past the end of the
   242  *        supplied array, and assume that (a) it won't
   243  *        compare equal to any pointer within the array,
   244  *        and (b) it will compare equal to a pointer
   245  *        obtained by stepping off the end of the array.
   246  *        These might fail on some segmented architectures.
   247  *      - I assume that there are 8 bits in a |char| when
   248  *        computing the size of stack needed. This would
   249  *        fail on machines with 9-bit or 16-bit bytes.
   250  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
   251  *        and |(size&(sizeof(int)-1))==0| then it's safe to
   252  *        get at array elements via |int*|s, and that if
   253  *        actually |size==sizeof(int)| as well then it's
   254  *        safe to treat the elements as |int|s. This might
   255  *        fail on systems that convert pointers to integers
   256  *        in non-standard ways.
   257  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
   258  *        would be false on a machine with 8-bit |char|s,
   259  *        16-bit |int|s and 4096-bit |size_t|s. :-)
   260  */
   261 
   262 /* The recursion logic is the same in each case.
   263  * We keep chopping up until we reach subarrays of size
   264  * strictly less than Trunc; we leave these unsorted. */
   265 #define Recurse(Trunc)				\
   266       { size_t l=last-ffirst,r=llast-first;	\
   267         if (l<Trunc) {				\
   268           if (r>=Trunc) doRight			\
   269           else pop				\
   270         }					\
   271         else if (l<=r) { pushLeft; doRight }	\
   272         else if (r>=Trunc) { pushRight; doLeft }\
   273         else doLeft				\
   274       }
   275 
   276 /* and so is the pivoting logic (note: last is inclusive): */
   277 #define Pivot(swapper,sz)			\
   278   if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
   279   else {	\
   280     if (compare(first,mid)<0) {			\
   281       if (compare(mid,last)>0) {		\
   282         swapper(mid,last);			\
   283         if (compare(first,mid)>0) swapper(first,mid);\
   284       }						\
   285     }						\
   286     else {					\
   287       if (compare(mid,last)>0) swapper(first,last)\
   288       else {					\
   289         swapper(first,mid);			\
   290         if (compare(mid,last)>0) swapper(mid,last);\
   291       }						\
   292     }						\
   293     first+=sz; last-=sz;			\
   294   }
   295 
   296 #ifdef DEBUG_QSORT
   297 #include <stdio.h>
   298 #endif
   299 
   300 /* and so is the partitioning logic: */
   301 #define Partition(swapper,sz) {			\
   302   do {						\
   303     while (compare(first,pivot)<0) first+=sz;	\
   304     while (compare(pivot,last)<0) last-=sz;	\
   305     if (first<last) {				\
   306       swapper(first,last);			\
   307       first+=sz; last-=sz; }			\
   308     else if (first==last) { first+=sz; last-=sz; break; }\
   309   } while (first<=last);			\
   310 }
   311 
   312 /* and so is the pre-insertion-sort operation of putting
   313  * the smallest element into place as a sentinel.
   314  * Doing this makes the inner loop nicer. I got this
   315  * idea from the GNU implementation of qsort().
   316  * We find the smallest element from the first |nmemb|,
   317  * or the first |limit|, whichever is smaller;
   318  * therefore we must have ensured that the globally smallest
   319  * element is in the first |limit|.
   320  */
   321 #define PreInsertion(swapper,limit,sz)		\
   322   first=base;					\
   323   last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\
   324   while (last!=base) {				\
   325     if (compare(first,last)>0) first=last;	\
   326     last-=sz; }					\
   327   if (first!=base) swapper(first,(char*)base);
   328 
   329 /* and so is the insertion sort, in the first two cases: */
   330 #define Insertion(swapper)			\
   331   last=((char*)base)+nmemb*size;		\
   332   for (first=((char*)base)+size;first!=last;first+=size) {	\
   333     char *test;					\
   334     /* Find the right place for |first|.	\
   335      * My apologies for var reuse. */		\
   336     for (test=first-size;compare(test,first)>0;test-=size) ;	\
   337     test+=size;					\
   338     if (test!=first) {				\
   339       /* Shift everything in [test,first)	\
   340        * up by one, and place |first|		\
   341        * where |test| is. */			\
   342       memcpy(pivot,first,size);			\
   343       memmove(test+size,test,first-test);	\
   344       memcpy(test,pivot,size);			\
   345     }						\
   346   }
   347 
   348 #define SWAP_nonaligned(a,b) { \
   349   register char *aa=(a),*bb=(b); \
   350   register size_t sz=size; \
   351   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
   352 
   353 #define SWAP_aligned(a,b) { \
   354   register int *aa=(int*)(a),*bb=(int*)(b); \
   355   register size_t sz=size; \
   356   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
   357 
   358 #define SWAP_words(a,b) { \
   359   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
   360 
   361 /* ---------------------------------------------------------------------- */
   362 
   363 static char * pivot_big(char *first, char *mid, char *last, size_t size,
   364                         int compare(const void *, const void *)) {
   365   int d=(((last-first)/size)>>3)*size;
   366 #ifdef DEBUG_QSORT
   367 fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));
   368 #endif
   369   char *m1,*m2,*m3;
   370   { char *a=first, *b=first+d, *c=first+2*d;
   371 #ifdef DEBUG_QSORT
   372 fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   373 #endif
   374     m1 = compare(a,b)<0 ?
   375            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   376          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   377   }
   378   { char *a=mid-d, *b=mid, *c=mid+d;
   379 #ifdef DEBUG_QSORT
   380 fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   381 #endif
   382     m2 = compare(a,b)<0 ?
   383            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   384          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   385   }
   386   { char *a=last-2*d, *b=last-d, *c=last;
   387 #ifdef DEBUG_QSORT
   388 fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   389 #endif
   390     m3 = compare(a,b)<0 ?
   391            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   392          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   393   }
   394 #ifdef DEBUG_QSORT
   395 fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);
   396 #endif
   397   return compare(m1,m2)<0 ?
   398            (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
   399          : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
   400 }
   401 
   402 /* ---------------------------------------------------------------------- */
   403 
   404 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
   405            int (*compare)(const void *, const void *)) {
   406 
   407   stack_entry stack[STACK_SIZE];
   408   int stacktop=0;
   409   char *first,*last;
   410   char *pivot=malloc(size);
   411   size_t trunc=TRUNC_nonaligned*size;
   412   assert(pivot!=0);
   413 
   414   first=(char*)base; last=first+(nmemb-1)*size;
   415 
   416   if ((size_t)(last-first)>=trunc) {
   417     char *ffirst=first, *llast=last;
   418     while (1) {
   419       /* Select pivot */
   420       { char * mid=first+size*((last-first)/size >> 1);
   421         Pivot(SWAP_nonaligned,size);
   422         memcpy(pivot,mid,size);
   423       }
   424       /* Partition. */
   425       Partition(SWAP_nonaligned,size);
   426       /* Prepare to recurse/iterate. */
   427       Recurse(trunc)
   428     }
   429   }
   430   PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
   431   Insertion(SWAP_nonaligned);
   432   free(pivot);
   433 }
   434 
   435 static void qsort_aligned(void *base, size_t nmemb, size_t size,
   436            int (*compare)(const void *, const void *)) {
   437 
   438   stack_entry stack[STACK_SIZE];
   439   int stacktop=0;
   440   char *first,*last;
   441   char *pivot=malloc(size);
   442   size_t trunc=TRUNC_aligned*size;
   443   assert(pivot!=0);
   444 
   445   first=(char*)base; last=first+(nmemb-1)*size;
   446 
   447   if ((size_t)(last-first)>=trunc) {
   448     char *ffirst=first,*llast=last;
   449     while (1) {
   450       /* Select pivot */
   451       { char * mid=first+size*((last-first)/size >> 1);
   452         Pivot(SWAP_aligned,size);
   453         memcpy(pivot,mid,size);
   454       }
   455       /* Partition. */
   456       Partition(SWAP_aligned,size);
   457       /* Prepare to recurse/iterate. */
   458       Recurse(trunc)
   459     }
   460   }
   461   PreInsertion(SWAP_aligned,TRUNC_aligned,size);
   462   Insertion(SWAP_aligned);
   463   free(pivot);
   464 }
   465 
   466 static void qsort_words(void *base, size_t nmemb,
   467            int (*compare)(const void *, const void *)) {
   468 
   469   stack_entry stack[STACK_SIZE];
   470   int stacktop=0;
   471   char *first,*last;
   472   char *pivot=malloc(WORD_BYTES);
   473   assert(pivot!=0);
   474 
   475   first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
   476 
   477   if (last-first>=TRUNC_words) {
   478     char *ffirst=first, *llast=last;
   479     while (1) {
   480 #ifdef DEBUG_QSORT
   481 fprintf(stderr,"Doing %d:%d: ",
   482         (first-(char*)base)/WORD_BYTES,
   483         (last-(char*)base)/WORD_BYTES);
   484 #endif
   485       /* Select pivot */
   486       { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
   487         Pivot(SWAP_words,WORD_BYTES);
   488         *(int*)pivot=*(int*)mid;
   489 #ifdef DEBUG_QSORT
   490 fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);
   491 #endif
   492       }
   493       /* Partition. */
   494       Partition(SWAP_words,WORD_BYTES);
   495 #ifdef DEBUG_QSORT
   496 fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);
   497 #endif
   498       /* Prepare to recurse/iterate. */
   499       Recurse(TRUNC_words)
   500     }
   501   }
   502   PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES);
   503   /* Now do insertion sort. */
   504   last=((char*)base)+nmemb*WORD_BYTES;
   505   for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
   506     /* Find the right place for |first|. My apologies for var reuse */
   507     int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
   508     *(int*)pivot=*(int*)first;
   509     for (;compare(pl,pivot)>0;pr=pl,--pl) {
   510       *pr=*pl; }
   511     if (pr!=(int*)first) *pr=*(int*)pivot;
   512   }
   513   free(pivot);
   514 }
   515 
   516 /* ---------------------------------------------------------------------- */
   517 
   518 extern void qsortG(void *base, size_t nmemb, size_t size,
   519            int (*compare)(const void *, const void *)) {
   520 
   521   if (nmemb<=1) return;
   522   if (((int)base|size)&(WORD_BYTES-1))
   523     qsort_nonaligned(base,nmemb,size,compare);
   524   else if (size!=WORD_BYTES)
   525     qsort_aligned(base,nmemb,size,compare);
   526   else
   527     qsort_words(base,nmemb,compare);
   528 }
   529 
   530 
   531 #endif /* HAVE_QSORT */
   532 
   533 /* vi: set ts=4 sw=4 expandtab: */
   534