src/stdlib/SDL_qsort.c
author Sam Lantinga
Fri, 05 Jul 2013 23:57:19 -0700
changeset 7351 668a3dc28361
parent 7003 eeaf77005c30
child 8093 b43765095a6f
permissions -rw-r--r--
Removed the inline functions from SDL_stdinc.h
Having the SDL functions inline is causing build issues, and in the case of malloc(), etc. causing malloc/free mismatches, if the application build environment differs from the SDL build environment.

In the interest of safety and consistency, the functions will always be in the SDL library and will only be redirected to the C library there, if they are available.

See the following threads on the SDL mailing list for the gruesome details:
* SDL_stdinc.h inlines problematic when application not compiled in exact same feature environment
* Error compiling program against SDL2 with -std=c++11 g++ flag
slouken@1330
     1
/* qsort.c
slouken@1330
     2
 * (c) 1998 Gareth McCaughan
slouken@1330
     3
 *
slouken@1330
     4
 * This is a drop-in replacement for the C library's |qsort()| routine.
slouken@1330
     5
 *
slouken@1330
     6
 * Features:
slouken@1330
     7
 *   - Median-of-three pivoting (and more)
slouken@1330
     8
 *   - Truncation and final polishing by a single insertion sort
slouken@1330
     9
 *   - Early truncation when no swaps needed in pivoting step
slouken@1330
    10
 *   - Explicit recursion, guaranteed not to overflow
slouken@1330
    11
 *   - A few little wrinkles stolen from the GNU |qsort()|.
slouken@1330
    12
 *   - separate code for non-aligned / aligned / word-size objects
slouken@1330
    13
 *
slouken@1330
    14
 * This code may be reproduced freely provided
slouken@1330
    15
 *   - this file is retained unaltered apart from minor
slouken@1330
    16
 *     changes for portability and efficiency
slouken@1330
    17
 *   - no changes are made to this comment
slouken@1330
    18
 *   - any changes that *are* made are clearly flagged
slouken@1330
    19
 *   - the _ID string below is altered by inserting, after
slouken@1330
    20
 *     the date, the string " altered" followed at your option
slouken@1330
    21
 *     by other material. (Exceptions: you may change the name
slouken@1330
    22
 *     of the exported routine without changing the ID string.
slouken@1330
    23
 *     You may change the values of the macros TRUNC_* and
slouken@1330
    24
 *     PIVOT_THRESHOLD without changing the ID string, provided
slouken@1330
    25
 *     they remain constants with TRUNC_nonaligned, TRUNC_aligned
slouken@1330
    26
 *     and TRUNC_words/WORD_BYTES between 8 and 24, and
slouken@1330
    27
 *     PIVOT_THRESHOLD between 32 and 200.)
slouken@1330
    28
 *
slouken@1330
    29
 * You may use it in anything you like; you may make money
slouken@1330
    30
 * out of it; you may distribute it in object form or as
slouken@1330
    31
 * part of an executable without including source code;
slouken@1330
    32
 * you don't have to credit me. (But it would be nice if
slouken@1330
    33
 * you did.)
slouken@1330
    34
 *
slouken@1330
    35
 * If you find problems with this code, or find ways of
slouken@1330
    36
 * making it significantly faster, please let me know!
slouken@1330
    37
 * My e-mail address, valid as of early 1998 and certainly
slouken@1330
    38
 * OK for at least the next 18 months, is
slouken@1330
    39
 *    gjm11@dpmms.cam.ac.uk
slouken@1330
    40
 * Thanks!
slouken@1330
    41
 *
slouken@1330
    42
 * Gareth McCaughan   Peterhouse   Cambridge   1998
slouken@1330
    43
 */
slouken@1402
    44
#include "SDL_config.h"
slouken@1330
    45
slouken@1330
    46
/*
slouken@1330
    47
#include <assert.h>
slouken@1330
    48
#include <stdlib.h>
slouken@1330
    49
#include <string.h>
slouken@1330
    50
*/
slouken@1354
    51
#include "SDL_stdinc.h"
icculus@6281
    52
#include "SDL_assert.h"
slouken@1330
    53
slouken@7351
    54
#if defined(HAVE_QSORT)
icculus@7003
    55
void
icculus@7003
    56
SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
icculus@7003
    57
{
slouken@7351
    58
    qsort(base, nmemb, size, compare);
icculus@7003
    59
}
icculus@7003
    60
#else
icculus@7003
    61
icculus@3616
    62
#ifdef assert
icculus@3616
    63
#undef assert
icculus@3616
    64
#endif
icculus@6281
    65
#define assert(X) SDL_assert(X)
icculus@3616
    66
#ifdef malloc
icculus@3616
    67
#undef malloc
icculus@3616
    68
#endif
slouken@1337
    69
#define malloc	SDL_malloc
icculus@3616
    70
#ifdef free
icculus@3616
    71
#undef free
icculus@3616
    72
#endif
slouken@1337
    73
#define free	SDL_free
icculus@3616
    74
#ifdef memcpy
icculus@3616
    75
#undef memcpy
icculus@3616
    76
#endif
slouken@1337
    77
#define memcpy	SDL_memcpy
icculus@3616
    78
#ifdef memmove
icculus@3616
    79
#undef memmove
icculus@3616
    80
#endif
slouken@1337
    81
#define memmove	SDL_memmove
icculus@3616
    82
#ifdef qsort
icculus@3616
    83
#undef qsort
icculus@3616
    84
#endif
slouken@1337
    85
#define qsort	SDL_qsort
slouken@1337
    86
slouken@3162
    87
static const char _ID[] = "<qsort.c gjm 1.12 1998-03-19>";
slouken@1330
    88
slouken@1330
    89
/* How many bytes are there per word? (Must be a power of 2,
slouken@1330
    90
 * and must in fact equal sizeof(int).)
slouken@1330
    91
 */
slouken@1330
    92
#define WORD_BYTES sizeof(int)
slouken@1330
    93
slouken@1330
    94
/* How big does our stack need to be? Answer: one entry per
slouken@1330
    95
 * bit in a |size_t|.
slouken@1330
    96
 */
slouken@1330
    97
#define STACK_SIZE (8*sizeof(size_t))
slouken@1330
    98
slouken@1330
    99
/* Different situations have slightly different requirements,
slouken@1330
   100
 * and we make life epsilon easier by using different truncation
slouken@1330
   101
 * points for the three different cases.
slouken@1330
   102
 * So far, I have tuned TRUNC_words and guessed that the same
slouken@1330
   103
 * value might work well for the other two cases. Of course
slouken@1330
   104
 * what works well on my machine might work badly on yours.
slouken@1330
   105
 */
slouken@1330
   106
#define TRUNC_nonaligned	12
slouken@1330
   107
#define TRUNC_aligned		12
slouken@1895
   108
#define TRUNC_words		12*WORD_BYTES   /* nb different meaning */
slouken@1330
   109
slouken@1330
   110
/* We use a simple pivoting algorithm for shortish sub-arrays
slouken@1330
   111
 * and a more complicated one for larger ones. The threshold
slouken@1330
   112
 * is PIVOT_THRESHOLD.
slouken@1330
   113
 */
slouken@1330
   114
#define PIVOT_THRESHOLD 40
slouken@1330
   115
slouken@1895
   116
typedef struct
slouken@1895
   117
{
slouken@1895
   118
    char *first;
slouken@1895
   119
    char *last;
slouken@1895
   120
} stack_entry;
slouken@1330
   121
#define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
slouken@1330
   122
#define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
slouken@1330
   123
#define doLeft {first=ffirst;llast=last;continue;}
slouken@1330
   124
#define doRight {ffirst=first;last=llast;continue;}
slouken@1330
   125
#define pop {if (--stacktop<0) break;\
slouken@1330
   126
  first=ffirst=stack[stacktop].first;\
slouken@1330
   127
  last=llast=stack[stacktop].last;\
slouken@1330
   128
  continue;}
slouken@1330
   129
slouken@1330
   130
/* Some comments on the implementation.
slouken@1330
   131
 * 1. When we finish partitioning the array into "low"
slouken@1330
   132
 *    and "high", we forget entirely about short subarrays,
slouken@1330
   133
 *    because they'll be done later by insertion sort.
slouken@1330
   134
 *    Doing lots of little insertion sorts might be a win
slouken@1330
   135
 *    on large datasets for locality-of-reference reasons,
slouken@1330
   136
 *    but it makes the code much nastier and increases
slouken@1330
   137
 *    bookkeeping overhead.
slouken@1330
   138
 * 2. We always save the shorter and get to work on the
slouken@1330
   139
 *    longer. This guarantees that every time we push
slouken@1330
   140
 *    an item onto the stack its size is <= 1/2 of that
slouken@1330
   141
 *    of its parent; so the stack can't need more than
slouken@1330
   142
 *    log_2(max-array-size) entries.
slouken@1330
   143
 * 3. We choose a pivot by looking at the first, last
slouken@1330
   144
 *    and middle elements. We arrange them into order
slouken@1330
   145
 *    because it's easy to do that in conjunction with
slouken@1330
   146
 *    choosing the pivot, and it makes things a little
slouken@1330
   147
 *    easier in the partitioning step. Anyway, the pivot
slouken@1330
   148
 *    is the middle of these three. It's still possible
slouken@1330
   149
 *    to construct datasets where the algorithm takes
slouken@1330
   150
 *    time of order n^2, but it simply never happens in
slouken@1330
   151
 *    practice.
slouken@1330
   152
 * 3' Newsflash: On further investigation I find that
slouken@1330
   153
 *    it's easy to construct datasets where median-of-3
slouken@1330
   154
 *    simply isn't good enough. So on large-ish subarrays
slouken@1330
   155
 *    we do a more sophisticated pivoting: we take three
slouken@1330
   156
 *    sets of 3 elements, find their medians, and then
slouken@1330
   157
 *    take the median of those.
slouken@1330
   158
 * 4. We copy the pivot element to a separate place
slouken@1330
   159
 *    because that way we can always do our comparisons
slouken@1330
   160
 *    directly against a pointer to that separate place,
slouken@1330
   161
 *    and don't have to wonder "did we move the pivot
slouken@1330
   162
 *    element?". This makes the inner loop better.
slouken@1330
   163
 * 5. It's possible to make the pivoting even more
slouken@1330
   164
 *    reliable by looking at more candidates when n
slouken@1330
   165
 *    is larger. (Taking this to its logical conclusion
slouken@1330
   166
 *    results in a variant of quicksort that doesn't
slouken@1330
   167
 *    have that n^2 worst case.) However, the overhead
slouken@1330
   168
 *    from the extra bookkeeping means that it's just
slouken@1330
   169
 *    not worth while.
slouken@1330
   170
 * 6. This is pretty clean and portable code. Here are
slouken@1330
   171
 *    all the potential portability pitfalls and problems
slouken@1330
   172
 *    I know of:
slouken@1330
   173
 *      - In one place (the insertion sort) I construct
slouken@1330
   174
 *        a pointer that points just past the end of the
slouken@1330
   175
 *        supplied array, and assume that (a) it won't
slouken@1330
   176
 *        compare equal to any pointer within the array,
slouken@1330
   177
 *        and (b) it will compare equal to a pointer
slouken@1330
   178
 *        obtained by stepping off the end of the array.
slouken@1330
   179
 *        These might fail on some segmented architectures.
slouken@1330
   180
 *      - I assume that there are 8 bits in a |char| when
slouken@1330
   181
 *        computing the size of stack needed. This would
slouken@1330
   182
 *        fail on machines with 9-bit or 16-bit bytes.
slouken@1330
   183
 *      - I assume that if |((int)base&(sizeof(int)-1))==0|
slouken@1330
   184
 *        and |(size&(sizeof(int)-1))==0| then it's safe to
slouken@1330
   185
 *        get at array elements via |int*|s, and that if
slouken@1330
   186
 *        actually |size==sizeof(int)| as well then it's
slouken@1330
   187
 *        safe to treat the elements as |int|s. This might
slouken@1330
   188
 *        fail on systems that convert pointers to integers
slouken@1330
   189
 *        in non-standard ways.
slouken@1330
   190
 *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
slouken@1330
   191
 *        would be false on a machine with 8-bit |char|s,
slouken@1330
   192
 *        16-bit |int|s and 4096-bit |size_t|s. :-)
slouken@1330
   193
 */
slouken@1330
   194
slouken@1330
   195
/* The recursion logic is the same in each case: */
slouken@1330
   196
#define Recurse(Trunc)				\
slouken@1330
   197
      { size_t l=last-ffirst,r=llast-first;	\
slouken@1330
   198
        if (l<Trunc) {				\
slouken@1330
   199
          if (r>=Trunc) doRight			\
slouken@1330
   200
          else pop				\
slouken@1330
   201
        }					\
slouken@1330
   202
        else if (l<=r) { pushLeft; doRight }	\
slouken@1330
   203
        else if (r>=Trunc) { pushRight; doLeft }\
slouken@1330
   204
        else doLeft				\
slouken@1330
   205
      }
slouken@1330
   206
slouken@1330
   207
/* and so is the pivoting logic: */
slouken@1330
   208
#define Pivot(swapper,sz)			\
slouken@1330
   209
  if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
slouken@1330
   210
  else {	\
slouken@1330
   211
    if (compare(first,mid)<0) {			\
slouken@1330
   212
      if (compare(mid,last)>0) {		\
slouken@1330
   213
        swapper(mid,last);			\
slouken@1330
   214
        if (compare(first,mid)>0) swapper(first,mid);\
slouken@1330
   215
      }						\
slouken@1330
   216
    }						\
slouken@1330
   217
    else {					\
slouken@1330
   218
      if (compare(mid,last)>0) swapper(first,last)\
slouken@1330
   219
      else {					\
slouken@1330
   220
        swapper(first,mid);			\
slouken@1330
   221
        if (compare(mid,last)>0) swapper(mid,last);\
slouken@1330
   222
      }						\
slouken@1330
   223
    }						\
slouken@1330
   224
    first+=sz; last-=sz;			\
slouken@1330
   225
  }
slouken@1330
   226
slouken@1330
   227
#ifdef DEBUG_QSORT
slouken@1330
   228
#include <stdio.h>
slouken@1330
   229
#endif
slouken@1330
   230
slouken@1330
   231
/* and so is the partitioning logic: */
slouken@1330
   232
#define Partition(swapper,sz) {			\
slouken@1330
   233
  int swapped=0;				\
slouken@1330
   234
  do {						\
slouken@1330
   235
    while (compare(first,pivot)<0) first+=sz;	\
slouken@1330
   236
    while (compare(pivot,last)<0) last-=sz;	\
slouken@1330
   237
    if (first<last) {				\
slouken@1330
   238
      swapper(first,last); swapped=1;		\
slouken@1330
   239
      first+=sz; last-=sz; }			\
slouken@1330
   240
    else if (first==last) { first+=sz; last-=sz; break; }\
slouken@1330
   241
  } while (first<=last);			\
slouken@1330
   242
  if (!swapped) pop				\
slouken@1330
   243
}
slouken@1330
   244
slouken@1330
   245
/* and so is the pre-insertion-sort operation of putting
slouken@1330
   246
 * the smallest element into place as a sentinel.
slouken@1330
   247
 * Doing this makes the inner loop nicer. I got this
slouken@1330
   248
 * idea from the GNU implementation of qsort().
slouken@1330
   249
 */
slouken@1330
   250
#define PreInsertion(swapper,limit,sz)		\
slouken@1330
   251
  first=base;					\
slouken@1330
   252
  last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
slouken@1330
   253
  while (last!=base) {				\
slouken@1330
   254
    if (compare(first,last)>0) first=last;	\
slouken@1330
   255
    last-=sz; }					\
slouken@1330
   256
  if (first!=base) swapper(first,(char*)base);
slouken@1330
   257
slouken@1330
   258
/* and so is the insertion sort, in the first two cases: */
slouken@1330
   259
#define Insertion(swapper)			\
slouken@1330
   260
  last=((char*)base)+nmemb*size;		\
slouken@1330
   261
  for (first=((char*)base)+size;first!=last;first+=size) {	\
slouken@1330
   262
    char *test;					\
slouken@1330
   263
    /* Find the right place for |first|.	\
slouken@1330
   264
     * My apologies for var reuse. */		\
slouken@1330
   265
    for (test=first-size;compare(test,first)>0;test-=size) ;	\
slouken@1330
   266
    test+=size;					\
slouken@1330
   267
    if (test!=first) {				\
slouken@1330
   268
      /* Shift everything in [test,first)	\
slouken@1330
   269
       * up by one, and place |first|		\
slouken@1330
   270
       * where |test| is. */			\
slouken@1337
   271
      memcpy(pivot,first,size);			\
slouken@1337
   272
      memmove(test+size,test,first-test);	\
slouken@1337
   273
      memcpy(test,pivot,size);			\
slouken@1330
   274
    }						\
slouken@1330
   275
  }
slouken@1330
   276
slouken@1330
   277
#define SWAP_nonaligned(a,b) { \
slouken@1330
   278
  register char *aa=(a),*bb=(b); \
slouken@1330
   279
  register size_t sz=size; \
slouken@1330
   280
  do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
slouken@1330
   281
slouken@1330
   282
#define SWAP_aligned(a,b) { \
slouken@1330
   283
  register int *aa=(int*)(a),*bb=(int*)(b); \
slouken@1330
   284
  register size_t sz=size; \
slouken@1330
   285
  do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
slouken@1330
   286
slouken@1330
   287
#define SWAP_words(a,b) { \
slouken@1330
   288
  register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
slouken@1330
   289
slouken@1330
   290
/* ---------------------------------------------------------------------- */
slouken@1330
   291
slouken@1895
   292
static char *
slouken@1895
   293
pivot_big(char *first, char *mid, char *last, size_t size,
slouken@1895
   294
          int compare(const void *, const void *))
slouken@1895
   295
{
slouken@1895
   296
    size_t d = (((last - first) / size) >> 3) * size;
slouken@1895
   297
    char *m1, *m2, *m3;
slouken@1895
   298
    {
slouken@1895
   299
        char *a = first, *b = first + d, *c = first + 2 * d;
slouken@1330
   300
#ifdef DEBUG_QSORT
slouken@1895
   301
        fprintf(stderr, "< %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
slouken@1330
   302
#endif
slouken@1895
   303
        m1 = compare(a, b) < 0 ?
slouken@1895
   304
            (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
slouken@1895
   305
            : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
slouken@1895
   306
    }
slouken@1895
   307
    {
slouken@1895
   308
        char *a = mid - d, *b = mid, *c = mid + d;
slouken@1330
   309
#ifdef DEBUG_QSORT
slouken@1895
   310
        fprintf(stderr, ". %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
slouken@1330
   311
#endif
slouken@1895
   312
        m2 = compare(a, b) < 0 ?
slouken@1895
   313
            (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
slouken@1895
   314
            : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
slouken@1895
   315
    }
slouken@1895
   316
    {
slouken@1895
   317
        char *a = last - 2 * d, *b = last - d, *c = last;
slouken@1330
   318
#ifdef DEBUG_QSORT
slouken@1895
   319
        fprintf(stderr, "> %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
slouken@1330
   320
#endif
slouken@1895
   321
        m3 = compare(a, b) < 0 ?
slouken@1895
   322
            (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
slouken@1895
   323
            : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
slouken@1895
   324
    }
slouken@1330
   325
#ifdef DEBUG_QSORT
slouken@1895
   326
    fprintf(stderr, "-> %d %d %d\n", *(int *) m1, *(int *) m2, *(int *) m3);
slouken@1330
   327
#endif
slouken@1895
   328
    return compare(m1, m2) < 0 ?
slouken@1895
   329
        (compare(m2, m3) < 0 ? m2 : (compare(m1, m3) < 0 ? m3 : m1))
slouken@1895
   330
        : (compare(m1, m3) < 0 ? m1 : (compare(m2, m3) < 0 ? m3 : m2));
slouken@1330
   331
}
slouken@1330
   332
slouken@1330
   333
/* ---------------------------------------------------------------------- */
slouken@1330
   334
slouken@1895
   335
static void
slouken@1895
   336
qsort_nonaligned(void *base, size_t nmemb, size_t size,
slouken@1895
   337
                 int (*compare) (const void *, const void *))
slouken@1895
   338
{
slouken@1330
   339
slouken@1895
   340
    stack_entry stack[STACK_SIZE];
slouken@1895
   341
    int stacktop = 0;
slouken@1895
   342
    char *first, *last;
slouken@1895
   343
    char *pivot = malloc(size);
slouken@1895
   344
    size_t trunc = TRUNC_nonaligned * size;
slouken@1895
   345
    assert(pivot != 0);
slouken@1330
   346
slouken@1895
   347
    first = (char *) base;
slouken@1895
   348
    last = first + (nmemb - 1) * size;
slouken@1330
   349
slouken@1895
   350
    if ((size_t) (last - first) > trunc) {
slouken@1895
   351
        char *ffirst = first, *llast = last;
slouken@1895
   352
        while (1) {
slouken@1895
   353
            /* Select pivot */
slouken@1895
   354
            {
slouken@1895
   355
                char *mid = first + size * ((last - first) / size >> 1);
slouken@1895
   356
                Pivot(SWAP_nonaligned, size);
slouken@1895
   357
                memcpy(pivot, mid, size);
slouken@1895
   358
            }
slouken@1895
   359
            /* Partition. */
slouken@1895
   360
            Partition(SWAP_nonaligned, size);
slouken@1895
   361
            /* Prepare to recurse/iterate. */
slouken@1895
   362
        Recurse(trunc)}
slouken@1330
   363
    }
slouken@1895
   364
    PreInsertion(SWAP_nonaligned, TRUNC_nonaligned, size);
slouken@1895
   365
    Insertion(SWAP_nonaligned);
slouken@1895
   366
    free(pivot);
slouken@1330
   367
}
slouken@1330
   368
slouken@1895
   369
static void
slouken@1895
   370
qsort_aligned(void *base, size_t nmemb, size_t size,
slouken@1895
   371
              int (*compare) (const void *, const void *))
slouken@1895
   372
{
slouken@1330
   373
slouken@1895
   374
    stack_entry stack[STACK_SIZE];
slouken@1895
   375
    int stacktop = 0;
slouken@1895
   376
    char *first, *last;
slouken@1895
   377
    char *pivot = malloc(size);
slouken@1895
   378
    size_t trunc = TRUNC_aligned * size;
slouken@1895
   379
    assert(pivot != 0);
slouken@1330
   380
slouken@1895
   381
    first = (char *) base;
slouken@1895
   382
    last = first + (nmemb - 1) * size;
slouken@1330
   383
slouken@1895
   384
    if ((size_t) (last - first) > trunc) {
slouken@1895
   385
        char *ffirst = first, *llast = last;
slouken@1895
   386
        while (1) {
slouken@1895
   387
            /* Select pivot */
slouken@1895
   388
            {
slouken@1895
   389
                char *mid = first + size * ((last - first) / size >> 1);
slouken@1895
   390
                Pivot(SWAP_aligned, size);
slouken@1895
   391
                memcpy(pivot, mid, size);
slouken@1895
   392
            }
slouken@1895
   393
            /* Partition. */
slouken@1895
   394
            Partition(SWAP_aligned, size);
slouken@1895
   395
            /* Prepare to recurse/iterate. */
slouken@1895
   396
        Recurse(trunc)}
slouken@1330
   397
    }
slouken@1895
   398
    PreInsertion(SWAP_aligned, TRUNC_aligned, size);
slouken@1895
   399
    Insertion(SWAP_aligned);
slouken@1895
   400
    free(pivot);
slouken@1330
   401
}
slouken@1330
   402
slouken@1895
   403
static void
slouken@1895
   404
qsort_words(void *base, size_t nmemb,
slouken@1895
   405
            int (*compare) (const void *, const void *))
slouken@1895
   406
{
slouken@1330
   407
slouken@1895
   408
    stack_entry stack[STACK_SIZE];
slouken@1895
   409
    int stacktop = 0;
slouken@1895
   410
    char *first, *last;
slouken@1895
   411
    char *pivot = malloc(WORD_BYTES);
slouken@1895
   412
    assert(pivot != 0);
slouken@1330
   413
slouken@1895
   414
    first = (char *) base;
slouken@1895
   415
    last = first + (nmemb - 1) * WORD_BYTES;
slouken@1330
   416
slouken@1895
   417
    if (last - first > TRUNC_words) {
slouken@1895
   418
        char *ffirst = first, *llast = last;
slouken@1895
   419
        while (1) {
slouken@1330
   420
#ifdef DEBUG_QSORT
slouken@1895
   421
            fprintf(stderr, "Doing %d:%d: ",
slouken@1895
   422
                    (first - (char *) base) / WORD_BYTES,
slouken@1895
   423
                    (last - (char *) base) / WORD_BYTES);
slouken@1330
   424
#endif
slouken@1895
   425
            /* Select pivot */
slouken@1895
   426
            {
slouken@1895
   427
                char *mid =
slouken@1895
   428
                    first + WORD_BYTES * ((last - first) / (2 * WORD_BYTES));
slouken@1895
   429
                Pivot(SWAP_words, WORD_BYTES);
slouken@1895
   430
                *(int *) pivot = *(int *) mid;
slouken@1895
   431
            }
slouken@1330
   432
#ifdef DEBUG_QSORT
slouken@1895
   433
            fprintf(stderr, "pivot=%d\n", *(int *) pivot);
slouken@1330
   434
#endif
slouken@1895
   435
            /* Partition. */
slouken@1895
   436
            Partition(SWAP_words, WORD_BYTES);
slouken@1895
   437
            /* Prepare to recurse/iterate. */
slouken@1895
   438
        Recurse(TRUNC_words)}
slouken@1330
   439
    }
slouken@1895
   440
    PreInsertion(SWAP_words, (TRUNC_words / WORD_BYTES), WORD_BYTES);
slouken@1895
   441
    /* Now do insertion sort. */
slouken@1895
   442
    last = ((char *) base) + nmemb * WORD_BYTES;
slouken@1895
   443
    for (first = ((char *) base) + WORD_BYTES; first != last;
slouken@1895
   444
         first += WORD_BYTES) {
slouken@1895
   445
        /* Find the right place for |first|. My apologies for var reuse */
slouken@1895
   446
        int *pl = (int *) (first - WORD_BYTES), *pr = (int *) first;
slouken@1895
   447
        *(int *) pivot = *(int *) first;
slouken@1895
   448
        for (; compare(pl, pivot) > 0; pr = pl, --pl) {
slouken@1895
   449
            *pr = *pl;
slouken@1895
   450
        }
slouken@1895
   451
        if (pr != (int *) first)
slouken@1895
   452
            *pr = *(int *) pivot;
slouken@1895
   453
    }
slouken@1895
   454
    free(pivot);
slouken@1330
   455
}
slouken@1330
   456
slouken@1330
   457
/* ---------------------------------------------------------------------- */
slouken@1330
   458
slouken@1895
   459
void
slouken@1895
   460
qsort(void *base, size_t nmemb, size_t size,
slouken@1895
   461
      int (*compare) (const void *, const void *))
slouken@1895
   462
{
slouken@1330
   463
slouken@1895
   464
    if (nmemb <= 1)
slouken@1895
   465
        return;
slouken@1895
   466
    if (((uintptr_t) base | size) & (WORD_BYTES - 1))
slouken@1895
   467
        qsort_nonaligned(base, nmemb, size, compare);
slouken@1895
   468
    else if (size != WORD_BYTES)
slouken@1895
   469
        qsort_aligned(base, nmemb, size, compare);
slouken@1895
   470
    else
slouken@1895
   471
        qsort_words(base, nmemb, compare);
slouken@1330
   472
}
slouken@1330
   473
icculus@7003
   474
#endif /* !SDL_qsort */
icculus@7003
   475
slouken@1895
   476
/* vi: set ts=4 sw=4 expandtab: */