src/stdlib/SDL_qsort.c
author Ozkan Sezer
Sun, 25 Oct 2020 17:05:40 +0300
branchSDL-1.2
changeset 14203 a3455c981bf9
parent 13077 cd46eb772bee
permissions -rw-r--r--
os2/SDL_sysjoystick.c: move struct _transaxes oot of struct joystick_hwdata
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2016 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 
    22 #include "SDL_config.h"
    23 
    24 #ifndef HAVE_QSORT
    25 #include "SDL_stdinc.h"
    26 
    27 #ifdef assert
    28 #undef assert
    29 #endif
    30 #define assert(X)
    31 #ifdef malloc
    32 #undef malloc
    33 #endif
    34 #define malloc SDL_malloc
    35 #ifdef free
    36 #undef free
    37 #endif
    38 #define free SDL_free
    39 #ifdef memcpy
    40 #undef memcpy
    41 #endif
    42 #define memcpy SDL_memcpy
    43 #ifdef memmove
    44 #undef memmove
    45 #endif
    46 #define memmove SDL_memmove
    47 #ifdef qsortG
    48 #undef qsortG
    49 #endif
    50 #define qsortG SDL_qsort
    51 
    52 /*
    53 This code came from Gareth McCaughan, under the zlib license.
    54 Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.15
    55 
    56 Everything below this comment until the HAVE_QSORT #endif was from Gareth
    57 (any minor changes will be noted inline).
    58 
    59 Thank you to Gareth for relicensing this code under the zlib license for our
    60 benefit!
    61 
    62 --ryan.
    63 */
    64 
    65 /* This is a drop-in replacement for the C library's |qsort()| routine.
    66  *
    67  * It is intended for use where you know or suspect that your
    68  * platform's qsort is bad. If that isn't the case, then you
    69  * should probably use the qsort your system gives you in preference
    70  * to mine -- it will likely have been tested and tuned better.
    71  *
    72  * Features:
    73  *   - Median-of-three pivoting (and more)
    74  *   - Truncation and final polishing by a single insertion sort
    75  *   - Early truncation when no swaps needed in pivoting step
    76  *   - Explicit recursion, guaranteed not to overflow
    77  *   - A few little wrinkles stolen from the GNU |qsort()|.
    78  *     (For the avoidance of doubt, no code was stolen, only
    79  *     broad ideas.)
    80  *   - separate code for non-aligned / aligned / word-size objects
    81  *
    82  * Earlier releases of this code used an idiosyncratic licence
    83  * I wrote myself, because I'm an idiot. The code is now released
    84  * under the "zlib/libpng licence"; you will find the actual
    85  * terms in the next comment. I request (but do not require)
    86  * that if you make any changes beyond the name of the exported
    87  * routine and reasonable tweaks to the TRUNC_* and
    88  * PIVOT_THRESHOLD values, you modify the _ID string so as
    89  * to make it clear that you have changed the code.
    90  *
    91  * If you find problems with this code, or find ways of
    92  * making it significantly faster, please let me know!
    93  * My e-mail address, valid as of early 2016 and for the
    94  * foreseeable future, is
    95  *    gareth.mccaughan@pobox.com
    96  * Thanks!
    97  *
    98  * Gareth McCaughan
    99  */
   100 
   101 /* Copyright (c) 1998-2016 Gareth McCaughan
   102  *
   103  * This software is provided 'as-is', without any express or implied
   104  * warranty. In no event will the authors be held liable for any
   105  * damages arising from the use of this software.
   106  *
   107  * Permission is granted to anyone to use this software for any purpose,
   108  * including commercial applications, and to alter it and redistribute it
   109  * freely, subject to the following restrictions:
   110  *
   111  * 1. The origin of this software must not be misrepresented;
   112  *    you must not claim that you wrote the original software.
   113  *    If you use this software in a product, an acknowledgment
   114  *    in the product documentation would be appreciated but
   115  *    is not required.
   116  *
   117  * 2. Altered source versions must be plainly marked as such,
   118  *    and must not be misrepresented as being the original software.
   119  *
   120  * 3. This notice may not be removed or altered from any source
   121  *    distribution.
   122  */
   123 
   124 /* Revision history since release:
   125  *   1998-03-19 v1.12 First release I have any records of.
   126  *   2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh
   127  *                    (premature termination of recursion).
   128  *                    Add a few clarifying comments.
   129  *                    Minor improvements to debug output.
   130  *   2016-02-21 v1.14 Replace licence with 2-clause BSD,
   131  *                    and clarify a couple of things in
   132  *                    comments. No code changes.
   133  *   2016-03-10 v1.15 Fix bug kindly reported by Ryan Gordon
   134  *                    (pre-insertion-sort messed up).
   135  *                    Disable DEBUG_QSORT by default.
   136  *                    Tweak comments very slightly.
   137  */
   138 
   139 /* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
   140 #if 0
   141 #include <assert.h>
   142 #include <stdlib.h>
   143 #include <string.h>
   144 
   145 #undef DEBUG_QSORT
   146 
   147 static char _ID[]="<qsort.c gjm 1.15 2016-03-10>";
   148 #endif
   149 /* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
   150 
   151 /* How many bytes are there per word? (Must be a power of 2,
   152  * and must in fact equal sizeof(int).)
   153  */
   154 #define WORD_BYTES sizeof(int)
   155 
   156 /* How big does our stack need to be? Answer: one entry per
   157  * bit in a |size_t|.
   158  */
   159 #define STACK_SIZE (8*sizeof(size_t))
   160 
   161 /* Different situations have slightly different requirements,
   162  * and we make life epsilon easier by using different truncation
   163  * points for the three different cases.
   164  * So far, I have tuned TRUNC_words and guessed that the same
   165  * value might work well for the other two cases. Of course
   166  * what works well on my machine might work badly on yours.
   167  */
   168 #define TRUNC_nonaligned	12
   169 #define TRUNC_aligned		12
   170 #define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
   171 
   172 /* We use a simple pivoting algorithm for shortish sub-arrays
   173  * and a more complicated one for larger ones. The threshold
   174  * is PIVOT_THRESHOLD.
   175  */
   176 #define PIVOT_THRESHOLD 40
   177 
   178 typedef struct { char * first; char * last; } stack_entry;
   179 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
   180 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
   181 #define doLeft {first=ffirst;llast=last;continue;}
   182 #define doRight {ffirst=first;last=llast;continue;}
   183 #define pop {if (--stacktop<0) break;\
   184   first=ffirst=stack[stacktop].first;\
   185   last=llast=stack[stacktop].last;\
   186   continue;}
   187 
   188 /* Some comments on the implementation.
   189  * 1. When we finish partitioning the array into "low"
   190  *    and "high", we forget entirely about short subarrays,
   191  *    because they'll be done later by insertion sort.
   192  *    Doing lots of little insertion sorts might be a win
   193  *    on large datasets for locality-of-reference reasons,
   194  *    but it makes the code much nastier and increases
   195  *    bookkeeping overhead.
   196  * 2. We always save the shorter and get to work on the
   197  *    longer. This guarantees that every time we push
   198  *    an item onto the stack its size is <= 1/2 of that
   199  *    of its parent; so the stack can't need more than
   200  *    log_2(max-array-size) entries.
   201  * 3. We choose a pivot by looking at the first, last
   202  *    and middle elements. We arrange them into order
   203  *    because it's easy to do that in conjunction with
   204  *    choosing the pivot, and it makes things a little
   205  *    easier in the partitioning step. Anyway, the pivot
   206  *    is the middle of these three. It's still possible
   207  *    to construct datasets where the algorithm takes
   208  *    time of order n^2, but it simply never happens in
   209  *    practice.
   210  * 3' Newsflash: On further investigation I find that
   211  *    it's easy to construct datasets where median-of-3
   212  *    simply isn't good enough. So on large-ish subarrays
   213  *    we do a more sophisticated pivoting: we take three
   214  *    sets of 3 elements, find their medians, and then
   215  *    take the median of those.
   216  * 4. We copy the pivot element to a separate place
   217  *    because that way we can always do our comparisons
   218  *    directly against a pointer to that separate place,
   219  *    and don't have to wonder "did we move the pivot
   220  *    element?". This makes the inner loop better.
   221  * 5. It's possible to make the pivoting even more
   222  *    reliable by looking at more candidates when n
   223  *    is larger. (Taking this to its logical conclusion
   224  *    results in a variant of quicksort that doesn't
   225  *    have that n^2 worst case.) However, the overhead
   226  *    from the extra bookkeeping means that it's just
   227  *    not worth while.
   228  * 6. This is pretty clean and portable code. Here are
   229  *    all the potential portability pitfalls and problems
   230  *    I know of:
   231  *      - In one place (the insertion sort) I construct
   232  *        a pointer that points just past the end of the
   233  *        supplied array, and assume that (a) it won't
   234  *        compare equal to any pointer within the array,
   235  *        and (b) it will compare equal to a pointer
   236  *        obtained by stepping off the end of the array.
   237  *        These might fail on some segmented architectures.
   238  *      - I assume that there are 8 bits in a |char| when
   239  *        computing the size of stack needed. This would
   240  *        fail on machines with 9-bit or 16-bit bytes.
   241  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
   242  *        and |(size&(sizeof(int)-1))==0| then it's safe to
   243  *        get at array elements via |int*|s, and that if
   244  *        actually |size==sizeof(int)| as well then it's
   245  *        safe to treat the elements as |int|s. This might
   246  *        fail on systems that convert pointers to integers
   247  *        in non-standard ways.
   248  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
   249  *        would be false on a machine with 8-bit |char|s,
   250  *        16-bit |int|s and 4096-bit |size_t|s. :-)
   251  */
   252 
   253 /* The recursion logic is the same in each case.
   254  * We keep chopping up until we reach subarrays of size
   255  * strictly less than Trunc; we leave these unsorted. */
   256 #define Recurse(Trunc)				\
   257       { size_t l=last-ffirst,r=llast-first;	\
   258         if (l<Trunc) {				\
   259           if (r>=Trunc) doRight			\
   260           else pop				\
   261         }					\
   262         else if (l<=r) { pushLeft; doRight }	\
   263         else if (r>=Trunc) { pushRight; doLeft }\
   264         else doLeft				\
   265       }
   266 
   267 /* and so is the pivoting logic (note: last is inclusive): */
   268 #define Pivot(swapper,sz)			\
   269   if (last-first>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
   270   else {	\
   271     if (compare(first,mid)<0) {			\
   272       if (compare(mid,last)>0) {		\
   273         swapper(mid,last);			\
   274         if (compare(first,mid)>0) swapper(first,mid);\
   275       }						\
   276     }						\
   277     else {					\
   278       if (compare(mid,last)>0) swapper(first,last)\
   279       else {					\
   280         swapper(first,mid);			\
   281         if (compare(mid,last)>0) swapper(mid,last);\
   282       }						\
   283     }						\
   284     first+=sz; last-=sz;			\
   285   }
   286 
   287 #ifdef DEBUG_QSORT
   288 #include <stdio.h>
   289 #endif
   290 
   291 /* and so is the partitioning logic: */
   292 #define Partition(swapper,sz) {			\
   293   do {						\
   294     while (compare(first,pivot)<0) first+=sz;	\
   295     while (compare(pivot,last)<0) last-=sz;	\
   296     if (first<last) {				\
   297       swapper(first,last);			\
   298       first+=sz; last-=sz; }			\
   299     else if (first==last) { first+=sz; last-=sz; break; }\
   300   } while (first<=last);			\
   301 }
   302 
   303 /* and so is the pre-insertion-sort operation of putting
   304  * the smallest element into place as a sentinel.
   305  * Doing this makes the inner loop nicer. I got this
   306  * idea from the GNU implementation of qsort().
   307  * We find the smallest element from the first |nmemb|,
   308  * or the first |limit|, whichever is smaller;
   309  * therefore we must have ensured that the globally smallest
   310  * element is in the first |limit| (because our
   311  * quicksort recursion bottoms out only once we
   312  * reach subarrays smaller than |limit|).
   313  */
   314 #define PreInsertion(swapper,limit,sz)		\
   315   first=base;					\
   316   last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\
   317   while (last!=base) {				\
   318     if (compare(first,last)>0) first=last;	\
   319     last-=sz; }					\
   320   if (first!=base) swapper(first,(char*)base);
   321 
   322 /* and so is the insertion sort, in the first two cases: */
   323 #define Insertion(swapper)			\
   324   last=((char*)base)+nmemb*size;		\
   325   for (first=((char*)base)+size;first!=last;first+=size) {	\
   326     char *test;					\
   327     /* Find the right place for |first|.	\
   328      * My apologies for var reuse. */		\
   329     for (test=first-size;compare(test,first)>0;test-=size) ;	\
   330     test+=size;					\
   331     if (test!=first) {				\
   332       /* Shift everything in [test,first)	\
   333        * up by one, and place |first|		\
   334        * where |test| is. */			\
   335       memcpy(pivot,first,size);			\
   336       memmove(test+size,test,first-test);	\
   337       memcpy(test,pivot,size);			\
   338     }						\
   339   }
   340 
   341 #define SWAP_nonaligned(a,b) { \
   342   register char *aa=(a),*bb=(b); \
   343   register size_t sz=size; \
   344   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
   345 
   346 #define SWAP_aligned(a,b) { \
   347   register int *aa=(int*)(a),*bb=(int*)(b); \
   348   register size_t sz=size; \
   349   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
   350 
   351 #define SWAP_words(a,b) { \
   352   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
   353 
   354 /* ---------------------------------------------------------------------- */
   355 
   356 static char * pivot_big(char *first, char *mid, char *last, size_t size,
   357                         int compare(const void *, const void *)) {
   358   int d=(((last-first)/size)>>3)*size;
   359 #ifdef DEBUG_QSORT
   360 fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));
   361 #endif
   362   char *m1,*m2,*m3;
   363   { char *a=first, *b=first+d, *c=first+2*d;
   364 #ifdef DEBUG_QSORT
   365 fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   366 #endif
   367     m1 = compare(a,b)<0 ?
   368            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   369          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   370   }
   371   { char *a=mid-d, *b=mid, *c=mid+d;
   372 #ifdef DEBUG_QSORT
   373 fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   374 #endif
   375     m2 = compare(a,b)<0 ?
   376            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   377          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   378   }
   379   { char *a=last-2*d, *b=last-d, *c=last;
   380 #ifdef DEBUG_QSORT
   381 fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
   382 #endif
   383     m3 = compare(a,b)<0 ?
   384            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
   385          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
   386   }
   387 #ifdef DEBUG_QSORT
   388 fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);
   389 #endif
   390   return compare(m1,m2)<0 ?
   391            (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
   392          : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
   393 }
   394 
   395 /* ---------------------------------------------------------------------- */
   396 
   397 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
   398            int (*compare)(const void *, const void *)) {
   399 
   400   stack_entry stack[STACK_SIZE];
   401   int stacktop=0;
   402   char *first,*last;
   403   char *pivot=malloc(size);
   404   size_t trunc=TRUNC_nonaligned*size;
   405   assert(pivot!=0);
   406 
   407   first=(char*)base; last=first+(nmemb-1)*size;
   408 
   409   if (last-first>=trunc) {
   410     char *ffirst=first, *llast=last;
   411     while (1) {
   412       /* Select pivot */
   413       { char * mid=first+size*((last-first)/size >> 1);
   414         Pivot(SWAP_nonaligned,size);
   415         memcpy(pivot,mid,size);
   416       }
   417       /* Partition. */
   418       Partition(SWAP_nonaligned,size);
   419       /* Prepare to recurse/iterate. */
   420       Recurse(trunc)
   421     }
   422   }
   423   PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
   424   Insertion(SWAP_nonaligned);
   425   free(pivot);
   426 }
   427 
   428 static void qsort_aligned(void *base, size_t nmemb, size_t size,
   429            int (*compare)(const void *, const void *)) {
   430 
   431   stack_entry stack[STACK_SIZE];
   432   int stacktop=0;
   433   char *first,*last;
   434   char *pivot=malloc(size);
   435   size_t trunc=TRUNC_aligned*size;
   436   assert(pivot!=0);
   437 
   438   first=(char*)base; last=first+(nmemb-1)*size;
   439 
   440   if (last-first>=trunc) {
   441     char *ffirst=first,*llast=last;
   442     while (1) {
   443       /* Select pivot */
   444       { char * mid=first+size*((last-first)/size >> 1);
   445         Pivot(SWAP_aligned,size);
   446         memcpy(pivot,mid,size);
   447       }
   448       /* Partition. */
   449       Partition(SWAP_aligned,size);
   450       /* Prepare to recurse/iterate. */
   451       Recurse(trunc)
   452     }
   453   }
   454   PreInsertion(SWAP_aligned,TRUNC_aligned,size);
   455   Insertion(SWAP_aligned);
   456   free(pivot);
   457 }
   458 
   459 static void qsort_words(void *base, size_t nmemb,
   460            int (*compare)(const void *, const void *)) {
   461 
   462   stack_entry stack[STACK_SIZE];
   463   int stacktop=0;
   464   char *first,*last;
   465   char *pivot=malloc(WORD_BYTES);
   466   assert(pivot!=0);
   467 
   468   first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
   469 
   470   if (last-first>=TRUNC_words) {
   471     char *ffirst=first, *llast=last;
   472     while (1) {
   473 #ifdef DEBUG_QSORT
   474 fprintf(stderr,"Doing %d:%d: ",
   475         (first-(char*)base)/WORD_BYTES,
   476         (last-(char*)base)/WORD_BYTES);
   477 #endif
   478       /* Select pivot */
   479       { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
   480         Pivot(SWAP_words,WORD_BYTES);
   481         *(int*)pivot=*(int*)mid;
   482 #ifdef DEBUG_QSORT
   483 fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);
   484 #endif
   485       }
   486       /* Partition. */
   487       Partition(SWAP_words,WORD_BYTES);
   488 #ifdef DEBUG_QSORT
   489 fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);
   490 #endif
   491       /* Prepare to recurse/iterate. */
   492       Recurse(TRUNC_words)
   493     }
   494   }
   495   PreInsertion(SWAP_words,TRUNC_words/WORD_BYTES,WORD_BYTES);
   496   /* Now do insertion sort. */
   497   last=((char*)base)+nmemb*WORD_BYTES;
   498   for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
   499     /* Find the right place for |first|. My apologies for var reuse */
   500     int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
   501     *(int*)pivot=*(int*)first;
   502     for (;compare(pl,pivot)>0;pr=pl,--pl) {
   503       *pr=*pl; }
   504     if (pr!=(int*)first) *pr=*(int*)pivot;
   505   }
   506   free(pivot);
   507 }
   508 
   509 /* ---------------------------------------------------------------------- */
   510 
   511 extern void qsortG(void *base, size_t nmemb, size_t size,
   512            int (*compare)(const void *, const void *)) {
   513 
   514   if (nmemb<=1) return;
   515   if (((int)base|size)&(WORD_BYTES-1))
   516     qsort_nonaligned(base,nmemb,size,compare);
   517   else if (size!=WORD_BYTES)
   518     qsort_aligned(base,nmemb,size,compare);
   519   else
   520     qsort_words(base,nmemb,compare);
   521 }
   522 
   523 #endif /* ifndef HAVE_QSORT */
   524 
   525 /* vi: set ts=4 sw=4 expandtab: */
   526