src/stdlib/SDL_qsort.c
author Ryan C. Gordon
Fri, 15 Mar 2013 01:01:20 -0400
changeset 7003 eeaf77005c30
parent 6281 e46d6f4b469e
child 7351 668a3dc28361
permissions -rw-r--r--
Improvements to stdlib.

All SDL_* functions are always available as real symbols, so you can always
link against them as a stable ABI. By default, however, all the things that
might have dithered down to macros in your application are now force-inlined,
to give you the same effect as before and theoretically better performance,
but still solve the classic macro problems.

Elsewhere, we provide real functions for these things that simply wrap the
inline functions, in case one needs to have a real function available.

Also: this exposed bugs: SDL_abs() does something different if you had the
macro vs the libc function, SDL_memcpy() returns a void* in the function
but not the macro, etc.
slouken@1330
     1
/* qsort.c
slouken@1330
     2
 * (c) 1998 Gareth McCaughan
slouken@1330
     3
 *
slouken@1330
     4
 * This is a drop-in replacement for the C library's |qsort()| routine.
slouken@1330
     5
 *
slouken@1330
     6
 * Features:
slouken@1330
     7
 *   - Median-of-three pivoting (and more)
slouken@1330
     8
 *   - Truncation and final polishing by a single insertion sort
slouken@1330
     9
 *   - Early truncation when no swaps needed in pivoting step
slouken@1330
    10
 *   - Explicit recursion, guaranteed not to overflow
slouken@1330
    11
 *   - A few little wrinkles stolen from the GNU |qsort()|.
slouken@1330
    12
 *   - separate code for non-aligned / aligned / word-size objects
slouken@1330
    13
 *
slouken@1330
    14
 * This code may be reproduced freely provided
slouken@1330
    15
 *   - this file is retained unaltered apart from minor
slouken@1330
    16
 *     changes for portability and efficiency
slouken@1330
    17
 *   - no changes are made to this comment
slouken@1330
    18
 *   - any changes that *are* made are clearly flagged
slouken@1330
    19
 *   - the _ID string below is altered by inserting, after
slouken@1330
    20
 *     the date, the string " altered" followed at your option
slouken@1330
    21
 *     by other material. (Exceptions: you may change the name
slouken@1330
    22
 *     of the exported routine without changing the ID string.
slouken@1330
    23
 *     You may change the values of the macros TRUNC_* and
slouken@1330
    24
 *     PIVOT_THRESHOLD without changing the ID string, provided
slouken@1330
    25
 *     they remain constants with TRUNC_nonaligned, TRUNC_aligned
slouken@1330
    26
 *     and TRUNC_words/WORD_BYTES between 8 and 24, and
slouken@1330
    27
 *     PIVOT_THRESHOLD between 32 and 200.)
slouken@1330
    28
 *
slouken@1330
    29
 * You may use it in anything you like; you may make money
slouken@1330
    30
 * out of it; you may distribute it in object form or as
slouken@1330
    31
 * part of an executable without including source code;
slouken@1330
    32
 * you don't have to credit me. (But it would be nice if
slouken@1330
    33
 * you did.)
slouken@1330
    34
 *
slouken@1330
    35
 * If you find problems with this code, or find ways of
slouken@1330
    36
 * making it significantly faster, please let me know!
slouken@1330
    37
 * My e-mail address, valid as of early 1998 and certainly
slouken@1330
    38
 * OK for at least the next 18 months, is
slouken@1330
    39
 *    gjm11@dpmms.cam.ac.uk
slouken@1330
    40
 * Thanks!
slouken@1330
    41
 *
slouken@1330
    42
 * Gareth McCaughan   Peterhouse   Cambridge   1998
slouken@1330
    43
 */
slouken@1402
    44
#include "SDL_config.h"
slouken@1330
    45
slouken@1330
    46
/*
slouken@1330
    47
#include <assert.h>
slouken@1330
    48
#include <stdlib.h>
slouken@1330
    49
#include <string.h>
slouken@1330
    50
*/
slouken@1354
    51
#include "SDL_stdinc.h"
icculus@6281
    52
#include "SDL_assert.h"
slouken@1330
    53
icculus@7003
    54
#ifdef SDL_qsort
icculus@7003
    55
#undef SDL_qsort
icculus@7003
    56
void
icculus@7003
    57
SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
icculus@7003
    58
{
icculus@7003
    59
    SDL_qsort_inline(base, nmemb, size, compare);
icculus@7003
    60
}
icculus@7003
    61
#else
icculus@7003
    62
icculus@3616
    63
#ifdef assert
icculus@3616
    64
#undef assert
icculus@3616
    65
#endif
icculus@6281
    66
#define assert(X) SDL_assert(X)
icculus@3616
    67
#ifdef malloc
icculus@3616
    68
#undef malloc
icculus@3616
    69
#endif
slouken@1337
    70
#define malloc	SDL_malloc
icculus@3616
    71
#ifdef free
icculus@3616
    72
#undef free
icculus@3616
    73
#endif
slouken@1337
    74
#define free	SDL_free
icculus@3616
    75
#ifdef memcpy
icculus@3616
    76
#undef memcpy
icculus@3616
    77
#endif
slouken@1337
    78
#define memcpy	SDL_memcpy
icculus@3616
    79
#ifdef memmove
icculus@3616
    80
#undef memmove
icculus@3616
    81
#endif
slouken@1337
    82
#define memmove	SDL_memmove
icculus@3616
    83
#ifdef qsort
icculus@3616
    84
#undef qsort
icculus@3616
    85
#endif
slouken@1337
    86
#define qsort	SDL_qsort
slouken@1337
    87
slouken@3162
    88
static const char _ID[] = "<qsort.c gjm 1.12 1998-03-19>";
slouken@1330
    89
slouken@1330
    90
/* How many bytes are there per word? (Must be a power of 2,
slouken@1330
    91
 * and must in fact equal sizeof(int).)
slouken@1330
    92
 */
slouken@1330
    93
#define WORD_BYTES sizeof(int)
slouken@1330
    94
slouken@1330
    95
/* How big does our stack need to be? Answer: one entry per
slouken@1330
    96
 * bit in a |size_t|.
slouken@1330
    97
 */
slouken@1330
    98
#define STACK_SIZE (8*sizeof(size_t))
slouken@1330
    99
slouken@1330
   100
/* Different situations have slightly different requirements,
slouken@1330
   101
 * and we make life epsilon easier by using different truncation
slouken@1330
   102
 * points for the three different cases.
slouken@1330
   103
 * So far, I have tuned TRUNC_words and guessed that the same
slouken@1330
   104
 * value might work well for the other two cases. Of course
slouken@1330
   105
 * what works well on my machine might work badly on yours.
slouken@1330
   106
 */
slouken@1330
   107
#define TRUNC_nonaligned	12
slouken@1330
   108
#define TRUNC_aligned		12
slouken@1895
   109
#define TRUNC_words		12*WORD_BYTES   /* nb different meaning */
slouken@1330
   110
slouken@1330
   111
/* We use a simple pivoting algorithm for shortish sub-arrays
slouken@1330
   112
 * and a more complicated one for larger ones. The threshold
slouken@1330
   113
 * is PIVOT_THRESHOLD.
slouken@1330
   114
 */
slouken@1330
   115
#define PIVOT_THRESHOLD 40
slouken@1330
   116
slouken@1895
   117
typedef struct
slouken@1895
   118
{
slouken@1895
   119
    char *first;
slouken@1895
   120
    char *last;
slouken@1895
   121
} stack_entry;
slouken@1330
   122
#define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
slouken@1330
   123
#define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
slouken@1330
   124
#define doLeft {first=ffirst;llast=last;continue;}
slouken@1330
   125
#define doRight {ffirst=first;last=llast;continue;}
slouken@1330
   126
#define pop {if (--stacktop<0) break;\
slouken@1330
   127
  first=ffirst=stack[stacktop].first;\
slouken@1330
   128
  last=llast=stack[stacktop].last;\
slouken@1330
   129
  continue;}
slouken@1330
   130
slouken@1330
   131
/* Some comments on the implementation.
slouken@1330
   132
 * 1. When we finish partitioning the array into "low"
slouken@1330
   133
 *    and "high", we forget entirely about short subarrays,
slouken@1330
   134
 *    because they'll be done later by insertion sort.
slouken@1330
   135
 *    Doing lots of little insertion sorts might be a win
slouken@1330
   136
 *    on large datasets for locality-of-reference reasons,
slouken@1330
   137
 *    but it makes the code much nastier and increases
slouken@1330
   138
 *    bookkeeping overhead.
slouken@1330
   139
 * 2. We always save the shorter and get to work on the
slouken@1330
   140
 *    longer. This guarantees that every time we push
slouken@1330
   141
 *    an item onto the stack its size is <= 1/2 of that
slouken@1330
   142
 *    of its parent; so the stack can't need more than
slouken@1330
   143
 *    log_2(max-array-size) entries.
slouken@1330
   144
 * 3. We choose a pivot by looking at the first, last
slouken@1330
   145
 *    and middle elements. We arrange them into order
slouken@1330
   146
 *    because it's easy to do that in conjunction with
slouken@1330
   147
 *    choosing the pivot, and it makes things a little
slouken@1330
   148
 *    easier in the partitioning step. Anyway, the pivot
slouken@1330
   149
 *    is the middle of these three. It's still possible
slouken@1330
   150
 *    to construct datasets where the algorithm takes
slouken@1330
   151
 *    time of order n^2, but it simply never happens in
slouken@1330
   152
 *    practice.
slouken@1330
   153
 * 3' Newsflash: On further investigation I find that
slouken@1330
   154
 *    it's easy to construct datasets where median-of-3
slouken@1330
   155
 *    simply isn't good enough. So on large-ish subarrays
slouken@1330
   156
 *    we do a more sophisticated pivoting: we take three
slouken@1330
   157
 *    sets of 3 elements, find their medians, and then
slouken@1330
   158
 *    take the median of those.
slouken@1330
   159
 * 4. We copy the pivot element to a separate place
slouken@1330
   160
 *    because that way we can always do our comparisons
slouken@1330
   161
 *    directly against a pointer to that separate place,
slouken@1330
   162
 *    and don't have to wonder "did we move the pivot
slouken@1330
   163
 *    element?". This makes the inner loop better.
slouken@1330
   164
 * 5. It's possible to make the pivoting even more
slouken@1330
   165
 *    reliable by looking at more candidates when n
slouken@1330
   166
 *    is larger. (Taking this to its logical conclusion
slouken@1330
   167
 *    results in a variant of quicksort that doesn't
slouken@1330
   168
 *    have that n^2 worst case.) However, the overhead
slouken@1330
   169
 *    from the extra bookkeeping means that it's just
slouken@1330
   170
 *    not worth while.
slouken@1330
   171
 * 6. This is pretty clean and portable code. Here are
slouken@1330
   172
 *    all the potential portability pitfalls and problems
slouken@1330
   173
 *    I know of:
slouken@1330
   174
 *      - In one place (the insertion sort) I construct
slouken@1330
   175
 *        a pointer that points just past the end of the
slouken@1330
   176
 *        supplied array, and assume that (a) it won't
slouken@1330
   177
 *        compare equal to any pointer within the array,
slouken@1330
   178
 *        and (b) it will compare equal to a pointer
slouken@1330
   179
 *        obtained by stepping off the end of the array.
slouken@1330
   180
 *        These might fail on some segmented architectures.
slouken@1330
   181
 *      - I assume that there are 8 bits in a |char| when
slouken@1330
   182
 *        computing the size of stack needed. This would
slouken@1330
   183
 *        fail on machines with 9-bit or 16-bit bytes.
slouken@1330
   184
 *      - I assume that if |((int)base&(sizeof(int)-1))==0|
slouken@1330
   185
 *        and |(size&(sizeof(int)-1))==0| then it's safe to
slouken@1330
   186
 *        get at array elements via |int*|s, and that if
slouken@1330
   187
 *        actually |size==sizeof(int)| as well then it's
slouken@1330
   188
 *        safe to treat the elements as |int|s. This might
slouken@1330
   189
 *        fail on systems that convert pointers to integers
slouken@1330
   190
 *        in non-standard ways.
slouken@1330
   191
 *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
slouken@1330
   192
 *        would be false on a machine with 8-bit |char|s,
slouken@1330
   193
 *        16-bit |int|s and 4096-bit |size_t|s. :-)
slouken@1330
   194
 */
slouken@1330
   195
slouken@1330
   196
/* The recursion logic is the same in each case: */
slouken@1330
   197
#define Recurse(Trunc)				\
slouken@1330
   198
      { size_t l=last-ffirst,r=llast-first;	\
slouken@1330
   199
        if (l<Trunc) {				\
slouken@1330
   200
          if (r>=Trunc) doRight			\
slouken@1330
   201
          else pop				\
slouken@1330
   202
        }					\
slouken@1330
   203
        else if (l<=r) { pushLeft; doRight }	\
slouken@1330
   204
        else if (r>=Trunc) { pushRight; doLeft }\
slouken@1330
   205
        else doLeft				\
slouken@1330
   206
      }
slouken@1330
   207
slouken@1330
   208
/* and so is the pivoting logic: */
slouken@1330
   209
#define Pivot(swapper,sz)			\
slouken@1330
   210
  if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
slouken@1330
   211
  else {	\
slouken@1330
   212
    if (compare(first,mid)<0) {			\
slouken@1330
   213
      if (compare(mid,last)>0) {		\
slouken@1330
   214
        swapper(mid,last);			\
slouken@1330
   215
        if (compare(first,mid)>0) swapper(first,mid);\
slouken@1330
   216
      }						\
slouken@1330
   217
    }						\
slouken@1330
   218
    else {					\
slouken@1330
   219
      if (compare(mid,last)>0) swapper(first,last)\
slouken@1330
   220
      else {					\
slouken@1330
   221
        swapper(first,mid);			\
slouken@1330
   222
        if (compare(mid,last)>0) swapper(mid,last);\
slouken@1330
   223
      }						\
slouken@1330
   224
    }						\
slouken@1330
   225
    first+=sz; last-=sz;			\
slouken@1330
   226
  }
slouken@1330
   227
slouken@1330
   228
#ifdef DEBUG_QSORT
slouken@1330
   229
#include <stdio.h>
slouken@1330
   230
#endif
slouken@1330
   231
slouken@1330
   232
/* and so is the partitioning logic: */
slouken@1330
   233
#define Partition(swapper,sz) {			\
slouken@1330
   234
  int swapped=0;				\
slouken@1330
   235
  do {						\
slouken@1330
   236
    while (compare(first,pivot)<0) first+=sz;	\
slouken@1330
   237
    while (compare(pivot,last)<0) last-=sz;	\
slouken@1330
   238
    if (first<last) {				\
slouken@1330
   239
      swapper(first,last); swapped=1;		\
slouken@1330
   240
      first+=sz; last-=sz; }			\
slouken@1330
   241
    else if (first==last) { first+=sz; last-=sz; break; }\
slouken@1330
   242
  } while (first<=last);			\
slouken@1330
   243
  if (!swapped) pop				\
slouken@1330
   244
}
slouken@1330
   245
slouken@1330
   246
/* and so is the pre-insertion-sort operation of putting
slouken@1330
   247
 * the smallest element into place as a sentinel.
slouken@1330
   248
 * Doing this makes the inner loop nicer. I got this
slouken@1330
   249
 * idea from the GNU implementation of qsort().
slouken@1330
   250
 */
slouken@1330
   251
#define PreInsertion(swapper,limit,sz)		\
slouken@1330
   252
  first=base;					\
slouken@1330
   253
  last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
slouken@1330
   254
  while (last!=base) {				\
slouken@1330
   255
    if (compare(first,last)>0) first=last;	\
slouken@1330
   256
    last-=sz; }					\
slouken@1330
   257
  if (first!=base) swapper(first,(char*)base);
slouken@1330
   258
slouken@1330
   259
/* and so is the insertion sort, in the first two cases: */
slouken@1330
   260
#define Insertion(swapper)			\
slouken@1330
   261
  last=((char*)base)+nmemb*size;		\
slouken@1330
   262
  for (first=((char*)base)+size;first!=last;first+=size) {	\
slouken@1330
   263
    char *test;					\
slouken@1330
   264
    /* Find the right place for |first|.	\
slouken@1330
   265
     * My apologies for var reuse. */		\
slouken@1330
   266
    for (test=first-size;compare(test,first)>0;test-=size) ;	\
slouken@1330
   267
    test+=size;					\
slouken@1330
   268
    if (test!=first) {				\
slouken@1330
   269
      /* Shift everything in [test,first)	\
slouken@1330
   270
       * up by one, and place |first|		\
slouken@1330
   271
       * where |test| is. */			\
slouken@1337
   272
      memcpy(pivot,first,size);			\
slouken@1337
   273
      memmove(test+size,test,first-test);	\
slouken@1337
   274
      memcpy(test,pivot,size);			\
slouken@1330
   275
    }						\
slouken@1330
   276
  }
slouken@1330
   277
slouken@1330
   278
#define SWAP_nonaligned(a,b) { \
slouken@1330
   279
  register char *aa=(a),*bb=(b); \
slouken@1330
   280
  register size_t sz=size; \
slouken@1330
   281
  do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
slouken@1330
   282
slouken@1330
   283
#define SWAP_aligned(a,b) { \
slouken@1330
   284
  register int *aa=(int*)(a),*bb=(int*)(b); \
slouken@1330
   285
  register size_t sz=size; \
slouken@1330
   286
  do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
slouken@1330
   287
slouken@1330
   288
#define SWAP_words(a,b) { \
slouken@1330
   289
  register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
slouken@1330
   290
slouken@1330
   291
/* ---------------------------------------------------------------------- */
slouken@1330
   292
slouken@1895
   293
static char *
slouken@1895
   294
pivot_big(char *first, char *mid, char *last, size_t size,
slouken@1895
   295
          int compare(const void *, const void *))
slouken@1895
   296
{
slouken@1895
   297
    size_t d = (((last - first) / size) >> 3) * size;
slouken@1895
   298
    char *m1, *m2, *m3;
slouken@1895
   299
    {
slouken@1895
   300
        char *a = first, *b = first + d, *c = first + 2 * d;
slouken@1330
   301
#ifdef DEBUG_QSORT
slouken@1895
   302
        fprintf(stderr, "< %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
slouken@1330
   303
#endif
slouken@1895
   304
        m1 = compare(a, b) < 0 ?
slouken@1895
   305
            (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
slouken@1895
   306
            : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
slouken@1895
   307
    }
slouken@1895
   308
    {
slouken@1895
   309
        char *a = mid - d, *b = mid, *c = mid + d;
slouken@1330
   310
#ifdef DEBUG_QSORT
slouken@1895
   311
        fprintf(stderr, ". %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
slouken@1330
   312
#endif
slouken@1895
   313
        m2 = compare(a, b) < 0 ?
slouken@1895
   314
            (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
slouken@1895
   315
            : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
slouken@1895
   316
    }
slouken@1895
   317
    {
slouken@1895
   318
        char *a = last - 2 * d, *b = last - d, *c = last;
slouken@1330
   319
#ifdef DEBUG_QSORT
slouken@1895
   320
        fprintf(stderr, "> %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
slouken@1330
   321
#endif
slouken@1895
   322
        m3 = compare(a, b) < 0 ?
slouken@1895
   323
            (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
slouken@1895
   324
            : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
slouken@1895
   325
    }
slouken@1330
   326
#ifdef DEBUG_QSORT
slouken@1895
   327
    fprintf(stderr, "-> %d %d %d\n", *(int *) m1, *(int *) m2, *(int *) m3);
slouken@1330
   328
#endif
slouken@1895
   329
    return compare(m1, m2) < 0 ?
slouken@1895
   330
        (compare(m2, m3) < 0 ? m2 : (compare(m1, m3) < 0 ? m3 : m1))
slouken@1895
   331
        : (compare(m1, m3) < 0 ? m1 : (compare(m2, m3) < 0 ? m3 : m2));
slouken@1330
   332
}
slouken@1330
   333
slouken@1330
   334
/* ---------------------------------------------------------------------- */
slouken@1330
   335
slouken@1895
   336
static void
slouken@1895
   337
qsort_nonaligned(void *base, size_t nmemb, size_t size,
slouken@1895
   338
                 int (*compare) (const void *, const void *))
slouken@1895
   339
{
slouken@1330
   340
slouken@1895
   341
    stack_entry stack[STACK_SIZE];
slouken@1895
   342
    int stacktop = 0;
slouken@1895
   343
    char *first, *last;
slouken@1895
   344
    char *pivot = malloc(size);
slouken@1895
   345
    size_t trunc = TRUNC_nonaligned * size;
slouken@1895
   346
    assert(pivot != 0);
slouken@1330
   347
slouken@1895
   348
    first = (char *) base;
slouken@1895
   349
    last = first + (nmemb - 1) * size;
slouken@1330
   350
slouken@1895
   351
    if ((size_t) (last - first) > trunc) {
slouken@1895
   352
        char *ffirst = first, *llast = last;
slouken@1895
   353
        while (1) {
slouken@1895
   354
            /* Select pivot */
slouken@1895
   355
            {
slouken@1895
   356
                char *mid = first + size * ((last - first) / size >> 1);
slouken@1895
   357
                Pivot(SWAP_nonaligned, size);
slouken@1895
   358
                memcpy(pivot, mid, size);
slouken@1895
   359
            }
slouken@1895
   360
            /* Partition. */
slouken@1895
   361
            Partition(SWAP_nonaligned, size);
slouken@1895
   362
            /* Prepare to recurse/iterate. */
slouken@1895
   363
        Recurse(trunc)}
slouken@1330
   364
    }
slouken@1895
   365
    PreInsertion(SWAP_nonaligned, TRUNC_nonaligned, size);
slouken@1895
   366
    Insertion(SWAP_nonaligned);
slouken@1895
   367
    free(pivot);
slouken@1330
   368
}
slouken@1330
   369
slouken@1895
   370
static void
slouken@1895
   371
qsort_aligned(void *base, size_t nmemb, size_t size,
slouken@1895
   372
              int (*compare) (const void *, const void *))
slouken@1895
   373
{
slouken@1330
   374
slouken@1895
   375
    stack_entry stack[STACK_SIZE];
slouken@1895
   376
    int stacktop = 0;
slouken@1895
   377
    char *first, *last;
slouken@1895
   378
    char *pivot = malloc(size);
slouken@1895
   379
    size_t trunc = TRUNC_aligned * size;
slouken@1895
   380
    assert(pivot != 0);
slouken@1330
   381
slouken@1895
   382
    first = (char *) base;
slouken@1895
   383
    last = first + (nmemb - 1) * size;
slouken@1330
   384
slouken@1895
   385
    if ((size_t) (last - first) > trunc) {
slouken@1895
   386
        char *ffirst = first, *llast = last;
slouken@1895
   387
        while (1) {
slouken@1895
   388
            /* Select pivot */
slouken@1895
   389
            {
slouken@1895
   390
                char *mid = first + size * ((last - first) / size >> 1);
slouken@1895
   391
                Pivot(SWAP_aligned, size);
slouken@1895
   392
                memcpy(pivot, mid, size);
slouken@1895
   393
            }
slouken@1895
   394
            /* Partition. */
slouken@1895
   395
            Partition(SWAP_aligned, size);
slouken@1895
   396
            /* Prepare to recurse/iterate. */
slouken@1895
   397
        Recurse(trunc)}
slouken@1330
   398
    }
slouken@1895
   399
    PreInsertion(SWAP_aligned, TRUNC_aligned, size);
slouken@1895
   400
    Insertion(SWAP_aligned);
slouken@1895
   401
    free(pivot);
slouken@1330
   402
}
slouken@1330
   403
slouken@1895
   404
static void
slouken@1895
   405
qsort_words(void *base, size_t nmemb,
slouken@1895
   406
            int (*compare) (const void *, const void *))
slouken@1895
   407
{
slouken@1330
   408
slouken@1895
   409
    stack_entry stack[STACK_SIZE];
slouken@1895
   410
    int stacktop = 0;
slouken@1895
   411
    char *first, *last;
slouken@1895
   412
    char *pivot = malloc(WORD_BYTES);
slouken@1895
   413
    assert(pivot != 0);
slouken@1330
   414
slouken@1895
   415
    first = (char *) base;
slouken@1895
   416
    last = first + (nmemb - 1) * WORD_BYTES;
slouken@1330
   417
slouken@1895
   418
    if (last - first > TRUNC_words) {
slouken@1895
   419
        char *ffirst = first, *llast = last;
slouken@1895
   420
        while (1) {
slouken@1330
   421
#ifdef DEBUG_QSORT
slouken@1895
   422
            fprintf(stderr, "Doing %d:%d: ",
slouken@1895
   423
                    (first - (char *) base) / WORD_BYTES,
slouken@1895
   424
                    (last - (char *) base) / WORD_BYTES);
slouken@1330
   425
#endif
slouken@1895
   426
            /* Select pivot */
slouken@1895
   427
            {
slouken@1895
   428
                char *mid =
slouken@1895
   429
                    first + WORD_BYTES * ((last - first) / (2 * WORD_BYTES));
slouken@1895
   430
                Pivot(SWAP_words, WORD_BYTES);
slouken@1895
   431
                *(int *) pivot = *(int *) mid;
slouken@1895
   432
            }
slouken@1330
   433
#ifdef DEBUG_QSORT
slouken@1895
   434
            fprintf(stderr, "pivot=%d\n", *(int *) pivot);
slouken@1330
   435
#endif
slouken@1895
   436
            /* Partition. */
slouken@1895
   437
            Partition(SWAP_words, WORD_BYTES);
slouken@1895
   438
            /* Prepare to recurse/iterate. */
slouken@1895
   439
        Recurse(TRUNC_words)}
slouken@1330
   440
    }
slouken@1895
   441
    PreInsertion(SWAP_words, (TRUNC_words / WORD_BYTES), WORD_BYTES);
slouken@1895
   442
    /* Now do insertion sort. */
slouken@1895
   443
    last = ((char *) base) + nmemb * WORD_BYTES;
slouken@1895
   444
    for (first = ((char *) base) + WORD_BYTES; first != last;
slouken@1895
   445
         first += WORD_BYTES) {
slouken@1895
   446
        /* Find the right place for |first|. My apologies for var reuse */
slouken@1895
   447
        int *pl = (int *) (first - WORD_BYTES), *pr = (int *) first;
slouken@1895
   448
        *(int *) pivot = *(int *) first;
slouken@1895
   449
        for (; compare(pl, pivot) > 0; pr = pl, --pl) {
slouken@1895
   450
            *pr = *pl;
slouken@1895
   451
        }
slouken@1895
   452
        if (pr != (int *) first)
slouken@1895
   453
            *pr = *(int *) pivot;
slouken@1895
   454
    }
slouken@1895
   455
    free(pivot);
slouken@1330
   456
}
slouken@1330
   457
slouken@1330
   458
/* ---------------------------------------------------------------------- */
slouken@1330
   459
slouken@1895
   460
void
slouken@1895
   461
qsort(void *base, size_t nmemb, size_t size,
slouken@1895
   462
      int (*compare) (const void *, const void *))
slouken@1895
   463
{
slouken@1330
   464
slouken@1895
   465
    if (nmemb <= 1)
slouken@1895
   466
        return;
slouken@1895
   467
    if (((uintptr_t) base | size) & (WORD_BYTES - 1))
slouken@1895
   468
        qsort_nonaligned(base, nmemb, size, compare);
slouken@1895
   469
    else if (size != WORD_BYTES)
slouken@1895
   470
        qsort_aligned(base, nmemb, size, compare);
slouken@1895
   471
    else
slouken@1895
   472
        qsort_words(base, nmemb, compare);
slouken@1330
   473
}
slouken@1330
   474
icculus@7003
   475
#endif /* !SDL_qsort */
icculus@7003
   476
slouken@1895
   477
/* vi: set ts=4 sw=4 expandtab: */