src/stdlib/SDL_qsort.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 01 Jan 2017 18:33:28 -0800
changeset 10737 3406a0f8b041
parent 10599 ade40eb390dc
child 11416 7e7d8a125d6a
permissions -rw-r--r--
Updated copyright for 2017
icculus@10085
     1
/*
icculus@10085
     2
  Simple DirectMedia Layer
slouken@10737
     3
  Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
icculus@10085
     4
icculus@10085
     5
  This software is provided 'as-is', without any express or implied
icculus@10085
     6
  warranty.  In no event will the authors be held liable for any damages
icculus@10085
     7
  arising from the use of this software.
icculus@10085
     8
icculus@10085
     9
  Permission is granted to anyone to use this software for any purpose,
icculus@10085
    10
  including commercial applications, and to alter it and redistribute it
icculus@10085
    11
  freely, subject to the following restrictions:
icculus@10085
    12
icculus@10085
    13
  1. The origin of this software must not be misrepresented; you must not
icculus@10085
    14
     claim that you wrote the original software. If you use this software
icculus@10085
    15
     in a product, an acknowledgment in the product documentation would be
icculus@10085
    16
     appreciated but is not required.
icculus@10085
    17
  2. Altered source versions must be plainly marked as such, and must not be
icculus@10085
    18
     misrepresented as being the original software.
icculus@10085
    19
  3. This notice may not be removed or altered from any source distribution.
icculus@10085
    20
*/
icculus@10085
    21
icculus@10085
    22
#if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
icculus@10085
    23
#define SDL_DISABLE_ANALYZE_MACROS 1
icculus@10085
    24
#endif
icculus@10085
    25
icculus@8093
    26
#include "../SDL_internal.h"
slouken@1330
    27
slouken@1354
    28
#include "SDL_stdinc.h"
icculus@6281
    29
#include "SDL_assert.h"
slouken@1330
    30
slouken@7351
    31
#if defined(HAVE_QSORT)
icculus@7003
    32
void
icculus@7003
    33
SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
icculus@7003
    34
{
slouken@7351
    35
    qsort(base, nmemb, size, compare);
icculus@7003
    36
}
icculus@10085
    37
icculus@7003
    38
#else
icculus@7003
    39
icculus@10085
    40
#ifdef assert
icculus@10085
    41
#undef assert
slouken@1330
    42
#endif
icculus@10085
    43
#define assert SDL_assert
icculus@10085
    44
#ifdef malloc
icculus@10085
    45
#undef malloc
icculus@10068
    46
#endif
icculus@10085
    47
#define malloc SDL_malloc
icculus@10085
    48
#ifdef free
icculus@10085
    49
#undef free
icculus@10068
    50
#endif
icculus@10085
    51
#define free SDL_free
icculus@10085
    52
#ifdef memcpy
icculus@10085
    53
#undef memcpy
icculus@10068
    54
#endif
icculus@10085
    55
#define memcpy SDL_memcpy
icculus@10085
    56
#ifdef memmove
icculus@10085
    57
#undef memmove
icculus@10069
    58
#endif
icculus@10085
    59
#define memmove SDL_memmove
icculus@10085
    60
#ifdef qsortG
icculus@10085
    61
#undef qsortG
icculus@10085
    62
#endif
icculus@10085
    63
#define qsortG SDL_qsort
icculus@10068
    64
icculus@10068
    65
/*
icculus@10085
    66
This code came from Gareth McCaughan, under the zlib license.
icculus@10085
    67
Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.14
icculus@10068
    68
icculus@10085
    69
Everything below this comment until the HAVE_QSORT #endif was from Gareth
icculus@10085
    70
(any minor changes will be noted inline).
icculus@10085
    71
icculus@10085
    72
Thank you to Gareth for relicensing this code under the zlib license for our
icculus@10085
    73
benefit!
icculus@10085
    74
icculus@10068
    75
--ryan.
icculus@10068
    76
*/
icculus@10068
    77
icculus@10085
    78
/* This is a drop-in replacement for the C library's |qsort()| routine.
icculus@10085
    79
 *
icculus@10085
    80
 * It is intended for use where you know or suspect that your
icculus@10085
    81
 * platform's qsort is bad. If that isn't the case, then you
icculus@10085
    82
 * should probably use the qsort your system gives you in preference
icculus@10085
    83
 * to mine -- it will likely have been tested and tuned better.
icculus@10085
    84
 *
icculus@10085
    85
 * Features:
icculus@10085
    86
 *   - Median-of-three pivoting (and more)
icculus@10085
    87
 *   - Truncation and final polishing by a single insertion sort
icculus@10085
    88
 *   - Early truncation when no swaps needed in pivoting step
icculus@10085
    89
 *   - Explicit recursion, guaranteed not to overflow
icculus@10085
    90
 *   - A few little wrinkles stolen from the GNU |qsort()|.
icculus@10085
    91
 *     (For the avoidance of doubt, no code was stolen, only
icculus@10085
    92
 *     broad ideas.)
icculus@10085
    93
 *   - separate code for non-aligned / aligned / word-size objects
icculus@10085
    94
 *
icculus@10085
    95
 * Earlier releases of this code used an idiosyncratic licence
icculus@10085
    96
 * I wrote myself, because I'm an idiot. The code is now released
icculus@10085
    97
 * under the "zlib/libpng licence"; you will find the actual
icculus@10085
    98
 * terms in the next comment. I request (but do not require)
icculus@10085
    99
 * that if you make any changes beyond the name of the exported
icculus@10085
   100
 * routine and reasonable tweaks to the TRUNC_* and
icculus@10085
   101
 * PIVOT_THRESHOLD values, you modify the _ID string so as
icculus@10085
   102
 * to make it clear that you have changed the code.
icculus@10085
   103
 *
icculus@10085
   104
 * If you find problems with this code, or find ways of
icculus@10085
   105
 * making it significantly faster, please let me know!
icculus@10085
   106
 * My e-mail address, valid as of early 2016 and for the
icculus@10085
   107
 * foreseeable future, is
icculus@10085
   108
 *    gareth.mccaughan@pobox.com
icculus@10085
   109
 * Thanks!
icculus@10085
   110
 *
icculus@10085
   111
 * Gareth McCaughan
icculus@10085
   112
 */
icculus@10068
   113
icculus@10085
   114
/* Copyright (c) 1998-2016 Gareth McCaughan
icculus@10085
   115
 *
icculus@10085
   116
 * This software is provided 'as-is', without any express or implied
icculus@10085
   117
 * warranty. In no event will the authors be held liable for any
icculus@10085
   118
 * damages arising from the use of this software.
icculus@10085
   119
 *
icculus@10085
   120
 * Permission is granted to anyone to use this software for any purpose,
icculus@10085
   121
 * including commercial applications, and to alter it and redistribute it
icculus@10085
   122
 * freely, subject to the following restrictions:
icculus@10085
   123
 *
icculus@10085
   124
 * 1. The origin of this software must not be misrepresented;
icculus@10085
   125
 *    you must not claim that you wrote the original software.
icculus@10085
   126
 *    If you use this software in a product, an acknowledgment
icculus@10085
   127
 *    in the product documentation would be appreciated but
icculus@10085
   128
 *    is not required.
icculus@10085
   129
 *
icculus@10085
   130
 * 2. Altered source versions must be plainly marked as such,
icculus@10085
   131
 *    and must not be misrepresented as being the original software.
icculus@10085
   132
 *
icculus@10085
   133
 * 3. This notice may not be removed or altered from any source
icculus@10085
   134
 *    distribution.
icculus@10085
   135
 */
icculus@10068
   136
icculus@10085
   137
/* Revision history since release:
icculus@10085
   138
 *   1998-03-19 v1.12 First release I have any records of.
icculus@10085
   139
 *   2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh
icculus@10085
   140
 *                    (premature termination of recursion).
icculus@10085
   141
 *                    Add a few clarifying comments.
icculus@10085
   142
 *                    Minor improvements to debug output.
icculus@10085
   143
 *   2016-02-21 v1.14 Replace licence with 2-clause BSD,
icculus@10085
   144
 *                    and clarify a couple of things in
icculus@10085
   145
 *                    comments. No code changes.
icculus@10085
   146
 */
icculus@10068
   147
icculus@10085
   148
/* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
icculus@10085
   149
#if 0
icculus@10085
   150
#include <assert.h>
icculus@10085
   151
#include <stdlib.h>
icculus@10085
   152
#include <string.h>
icculus@10068
   153
icculus@10085
   154
#define DEBUG_QSORT
icculus@10068
   155
icculus@10085
   156
static char _ID[]="<qsort.c gjm 1.14 2016-02-21>";
icculus@10085
   157
#endif
icculus@10085
   158
/* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
icculus@10068
   159
icculus@10085
   160
/* How many bytes are there per word? (Must be a power of 2,
icculus@10085
   161
 * and must in fact equal sizeof(int).)
icculus@10085
   162
 */
icculus@10085
   163
#define WORD_BYTES sizeof(int)
icculus@10068
   164
icculus@10085
   165
/* How big does our stack need to be? Answer: one entry per
icculus@10085
   166
 * bit in a |size_t|.
icculus@10085
   167
 */
icculus@10085
   168
#define STACK_SIZE (8*sizeof(size_t))
icculus@10085
   169
icculus@10085
   170
/* Different situations have slightly different requirements,
icculus@10085
   171
 * and we make life epsilon easier by using different truncation
icculus@10085
   172
 * points for the three different cases.
icculus@10085
   173
 * So far, I have tuned TRUNC_words and guessed that the same
icculus@10085
   174
 * value might work well for the other two cases. Of course
icculus@10085
   175
 * what works well on my machine might work badly on yours.
icculus@10085
   176
 */
icculus@10085
   177
#define TRUNC_nonaligned	12
icculus@10085
   178
#define TRUNC_aligned		12
icculus@10085
   179
#define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
icculus@10085
   180
icculus@10085
   181
/* We use a simple pivoting algorithm for shortish sub-arrays
icculus@10085
   182
 * and a more complicated one for larger ones. The threshold
icculus@10085
   183
 * is PIVOT_THRESHOLD.
icculus@10085
   184
 */
icculus@10085
   185
#define PIVOT_THRESHOLD 40
icculus@10085
   186
icculus@10085
   187
typedef struct { char * first; char * last; } stack_entry;
icculus@10085
   188
#define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
icculus@10085
   189
#define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
icculus@10085
   190
#define doLeft {first=ffirst;llast=last;continue;}
icculus@10085
   191
#define doRight {ffirst=first;last=llast;continue;}
icculus@10085
   192
#define pop {if (--stacktop<0) break;\
icculus@10085
   193
  first=ffirst=stack[stacktop].first;\
icculus@10085
   194
  last=llast=stack[stacktop].last;\
icculus@10085
   195
  continue;}
icculus@10085
   196
icculus@10085
   197
/* Some comments on the implementation.
icculus@10085
   198
 * 1. When we finish partitioning the array into "low"
icculus@10085
   199
 *    and "high", we forget entirely about short subarrays,
icculus@10085
   200
 *    because they'll be done later by insertion sort.
icculus@10085
   201
 *    Doing lots of little insertion sorts might be a win
icculus@10085
   202
 *    on large datasets for locality-of-reference reasons,
icculus@10085
   203
 *    but it makes the code much nastier and increases
icculus@10085
   204
 *    bookkeeping overhead.
icculus@10085
   205
 * 2. We always save the shorter and get to work on the
icculus@10085
   206
 *    longer. This guarantees that every time we push
icculus@10085
   207
 *    an item onto the stack its size is <= 1/2 of that
icculus@10085
   208
 *    of its parent; so the stack can't need more than
icculus@10085
   209
 *    log_2(max-array-size) entries.
icculus@10085
   210
 * 3. We choose a pivot by looking at the first, last
icculus@10085
   211
 *    and middle elements. We arrange them into order
icculus@10085
   212
 *    because it's easy to do that in conjunction with
icculus@10085
   213
 *    choosing the pivot, and it makes things a little
icculus@10085
   214
 *    easier in the partitioning step. Anyway, the pivot
icculus@10085
   215
 *    is the middle of these three. It's still possible
icculus@10085
   216
 *    to construct datasets where the algorithm takes
icculus@10085
   217
 *    time of order n^2, but it simply never happens in
icculus@10085
   218
 *    practice.
icculus@10085
   219
 * 3' Newsflash: On further investigation I find that
icculus@10085
   220
 *    it's easy to construct datasets where median-of-3
icculus@10085
   221
 *    simply isn't good enough. So on large-ish subarrays
icculus@10085
   222
 *    we do a more sophisticated pivoting: we take three
icculus@10085
   223
 *    sets of 3 elements, find their medians, and then
icculus@10085
   224
 *    take the median of those.
icculus@10085
   225
 * 4. We copy the pivot element to a separate place
icculus@10085
   226
 *    because that way we can always do our comparisons
icculus@10085
   227
 *    directly against a pointer to that separate place,
icculus@10085
   228
 *    and don't have to wonder "did we move the pivot
icculus@10085
   229
 *    element?". This makes the inner loop better.
icculus@10085
   230
 * 5. It's possible to make the pivoting even more
icculus@10085
   231
 *    reliable by looking at more candidates when n
icculus@10085
   232
 *    is larger. (Taking this to its logical conclusion
icculus@10085
   233
 *    results in a variant of quicksort that doesn't
icculus@10085
   234
 *    have that n^2 worst case.) However, the overhead
icculus@10085
   235
 *    from the extra bookkeeping means that it's just
icculus@10085
   236
 *    not worth while.
icculus@10085
   237
 * 6. This is pretty clean and portable code. Here are
icculus@10085
   238
 *    all the potential portability pitfalls and problems
icculus@10085
   239
 *    I know of:
icculus@10085
   240
 *      - In one place (the insertion sort) I construct
icculus@10085
   241
 *        a pointer that points just past the end of the
icculus@10085
   242
 *        supplied array, and assume that (a) it won't
icculus@10085
   243
 *        compare equal to any pointer within the array,
icculus@10085
   244
 *        and (b) it will compare equal to a pointer
icculus@10085
   245
 *        obtained by stepping off the end of the array.
icculus@10085
   246
 *        These might fail on some segmented architectures.
icculus@10085
   247
 *      - I assume that there are 8 bits in a |char| when
icculus@10085
   248
 *        computing the size of stack needed. This would
icculus@10085
   249
 *        fail on machines with 9-bit or 16-bit bytes.
icculus@10085
   250
 *      - I assume that if |((int)base&(sizeof(int)-1))==0|
icculus@10085
   251
 *        and |(size&(sizeof(int)-1))==0| then it's safe to
icculus@10085
   252
 *        get at array elements via |int*|s, and that if
icculus@10085
   253
 *        actually |size==sizeof(int)| as well then it's
icculus@10085
   254
 *        safe to treat the elements as |int|s. This might
icculus@10085
   255
 *        fail on systems that convert pointers to integers
icculus@10085
   256
 *        in non-standard ways.
icculus@10085
   257
 *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
icculus@10085
   258
 *        would be false on a machine with 8-bit |char|s,
icculus@10085
   259
 *        16-bit |int|s and 4096-bit |size_t|s. :-)
icculus@10085
   260
 */
icculus@10085
   261
icculus@10085
   262
/* The recursion logic is the same in each case.
icculus@10085
   263
 * We keep chopping up until we reach subarrays of size
icculus@10085
   264
 * strictly less than Trunc; we leave these unsorted. */
icculus@10085
   265
#define Recurse(Trunc)				\
icculus@10085
   266
      { size_t l=last-ffirst,r=llast-first;	\
icculus@10085
   267
        if (l<Trunc) {				\
icculus@10085
   268
          if (r>=Trunc) doRight			\
icculus@10085
   269
          else pop				\
icculus@10085
   270
        }					\
icculus@10085
   271
        else if (l<=r) { pushLeft; doRight }	\
icculus@10085
   272
        else if (r>=Trunc) { pushRight; doLeft }\
icculus@10085
   273
        else doLeft				\
icculus@10085
   274
      }
icculus@10085
   275
icculus@10085
   276
/* and so is the pivoting logic (note: last is inclusive): */
icculus@10085
   277
#define Pivot(swapper,sz)			\
slouken@10599
   278
  if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
icculus@10085
   279
  else {	\
icculus@10085
   280
    if (compare(first,mid)<0) {			\
icculus@10085
   281
      if (compare(mid,last)>0) {		\
icculus@10085
   282
        swapper(mid,last);			\
icculus@10085
   283
        if (compare(first,mid)>0) swapper(first,mid);\
icculus@10085
   284
      }						\
icculus@10085
   285
    }						\
icculus@10085
   286
    else {					\
icculus@10085
   287
      if (compare(mid,last)>0) swapper(first,last)\
icculus@10085
   288
      else {					\
icculus@10085
   289
        swapper(first,mid);			\
icculus@10085
   290
        if (compare(mid,last)>0) swapper(mid,last);\
icculus@10085
   291
      }						\
icculus@10085
   292
    }						\
icculus@10085
   293
    first+=sz; last-=sz;			\
icculus@10085
   294
  }
icculus@10085
   295
icculus@10085
   296
#ifdef DEBUG_QSORT
icculus@10085
   297
#include <stdio.h>
icculus@10085
   298
#endif
icculus@10085
   299
icculus@10085
   300
/* and so is the partitioning logic: */
icculus@10085
   301
#define Partition(swapper,sz) {			\
icculus@10085
   302
  do {						\
icculus@10085
   303
    while (compare(first,pivot)<0) first+=sz;	\
icculus@10085
   304
    while (compare(pivot,last)<0) last-=sz;	\
icculus@10085
   305
    if (first<last) {				\
icculus@10085
   306
      swapper(first,last);			\
icculus@10085
   307
      first+=sz; last-=sz; }			\
icculus@10085
   308
    else if (first==last) { first+=sz; last-=sz; break; }\
icculus@10085
   309
  } while (first<=last);			\
slouken@1330
   310
}
slouken@1330
   311
icculus@10085
   312
/* and so is the pre-insertion-sort operation of putting
icculus@10085
   313
 * the smallest element into place as a sentinel.
icculus@10085
   314
 * Doing this makes the inner loop nicer. I got this
icculus@10085
   315
 * idea from the GNU implementation of qsort().
icculus@10085
   316
 * We find the smallest element from the first |nmemb|,
icculus@10085
   317
 * or the first |limit|, whichever is smaller;
icculus@10085
   318
 * therefore we must have ensured that the globally smallest
icculus@10085
   319
 * element is in the first |limit|.
icculus@10085
   320
 */
icculus@10085
   321
#define PreInsertion(swapper,limit,sz)		\
icculus@10085
   322
  first=base;					\
icculus@10085
   323
  last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\
icculus@10085
   324
  while (last!=base) {				\
icculus@10085
   325
    if (compare(first,last)>0) first=last;	\
icculus@10085
   326
    last-=sz; }					\
icculus@10085
   327
  if (first!=base) swapper(first,(char*)base);
slouken@1330
   328
icculus@10085
   329
/* and so is the insertion sort, in the first two cases: */
icculus@10085
   330
#define Insertion(swapper)			\
icculus@10085
   331
  last=((char*)base)+nmemb*size;		\
icculus@10085
   332
  for (first=((char*)base)+size;first!=last;first+=size) {	\
icculus@10085
   333
    char *test;					\
icculus@10085
   334
    /* Find the right place for |first|.	\
icculus@10085
   335
     * My apologies for var reuse. */		\
icculus@10085
   336
    for (test=first-size;compare(test,first)>0;test-=size) ;	\
icculus@10085
   337
    test+=size;					\
icculus@10085
   338
    if (test!=first) {				\
icculus@10085
   339
      /* Shift everything in [test,first)	\
icculus@10085
   340
       * up by one, and place |first|		\
icculus@10085
   341
       * where |test| is. */			\
icculus@10085
   342
      memcpy(pivot,first,size);			\
icculus@10085
   343
      memmove(test+size,test,first-test);	\
icculus@10085
   344
      memcpy(test,pivot,size);			\
icculus@10085
   345
    }						\
icculus@10085
   346
  }
slouken@1330
   347
icculus@10085
   348
#define SWAP_nonaligned(a,b) { \
icculus@10085
   349
  register char *aa=(a),*bb=(b); \
icculus@10085
   350
  register size_t sz=size; \
icculus@10085
   351
  do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
slouken@1330
   352
icculus@10085
   353
#define SWAP_aligned(a,b) { \
icculus@10085
   354
  register int *aa=(int*)(a),*bb=(int*)(b); \
icculus@10085
   355
  register size_t sz=size; \
icculus@10085
   356
  do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
icculus@10085
   357
icculus@10085
   358
#define SWAP_words(a,b) { \
icculus@10085
   359
  register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
icculus@10085
   360
icculus@10085
   361
/* ---------------------------------------------------------------------- */
icculus@10085
   362
icculus@10085
   363
static char * pivot_big(char *first, char *mid, char *last, size_t size,
icculus@10085
   364
                        int compare(const void *, const void *)) {
icculus@10085
   365
  int d=(((last-first)/size)>>3)*size;
icculus@10085
   366
#ifdef DEBUG_QSORT
icculus@10085
   367
fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));
icculus@10085
   368
#endif
icculus@10085
   369
  char *m1,*m2,*m3;
icculus@10085
   370
  { char *a=first, *b=first+d, *c=first+2*d;
icculus@10085
   371
#ifdef DEBUG_QSORT
icculus@10085
   372
fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
icculus@10085
   373
#endif
icculus@10085
   374
    m1 = compare(a,b)<0 ?
icculus@10085
   375
           (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
icculus@10085
   376
         : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
icculus@10085
   377
  }
icculus@10085
   378
  { char *a=mid-d, *b=mid, *c=mid+d;
icculus@10085
   379
#ifdef DEBUG_QSORT
icculus@10085
   380
fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
icculus@10085
   381
#endif
icculus@10085
   382
    m2 = compare(a,b)<0 ?
icculus@10085
   383
           (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
icculus@10085
   384
         : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
icculus@10085
   385
  }
icculus@10085
   386
  { char *a=last-2*d, *b=last-d, *c=last;
icculus@10085
   387
#ifdef DEBUG_QSORT
icculus@10085
   388
fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
icculus@10085
   389
#endif
icculus@10085
   390
    m3 = compare(a,b)<0 ?
icculus@10085
   391
           (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
icculus@10085
   392
         : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
icculus@10085
   393
  }
icculus@10085
   394
#ifdef DEBUG_QSORT
icculus@10085
   395
fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);
icculus@10085
   396
#endif
icculus@10085
   397
  return compare(m1,m2)<0 ?
icculus@10085
   398
           (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
icculus@10085
   399
         : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
slouken@1330
   400
}
slouken@1330
   401
icculus@10085
   402
/* ---------------------------------------------------------------------- */
slouken@1330
   403
icculus@10085
   404
static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
icculus@10085
   405
           int (*compare)(const void *, const void *)) {
icculus@10068
   406
icculus@10085
   407
  stack_entry stack[STACK_SIZE];
icculus@10085
   408
  int stacktop=0;
icculus@10085
   409
  char *first,*last;
icculus@10085
   410
  char *pivot=malloc(size);
icculus@10085
   411
  size_t trunc=TRUNC_nonaligned*size;
icculus@10085
   412
  assert(pivot!=0);
icculus@10085
   413
icculus@10085
   414
  first=(char*)base; last=first+(nmemb-1)*size;
icculus@10085
   415
slouken@10599
   416
  if ((size_t)(last-first)>=trunc) {
icculus@10085
   417
    char *ffirst=first, *llast=last;
icculus@10085
   418
    while (1) {
icculus@10085
   419
      /* Select pivot */
icculus@10085
   420
      { char * mid=first+size*((last-first)/size >> 1);
icculus@10085
   421
        Pivot(SWAP_nonaligned,size);
icculus@10085
   422
        memcpy(pivot,mid,size);
icculus@10085
   423
      }
icculus@10085
   424
      /* Partition. */
icculus@10085
   425
      Partition(SWAP_nonaligned,size);
icculus@10085
   426
      /* Prepare to recurse/iterate. */
icculus@10085
   427
      Recurse(trunc)
icculus@10085
   428
    }
icculus@10085
   429
  }
slouken@10116
   430
  PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
icculus@10085
   431
  Insertion(SWAP_nonaligned);
icculus@10085
   432
  free(pivot);
slouken@1330
   433
}
slouken@1330
   434
icculus@10085
   435
static void qsort_aligned(void *base, size_t nmemb, size_t size,
icculus@10085
   436
           int (*compare)(const void *, const void *)) {
icculus@10085
   437
icculus@10085
   438
  stack_entry stack[STACK_SIZE];
icculus@10085
   439
  int stacktop=0;
icculus@10085
   440
  char *first,*last;
icculus@10085
   441
  char *pivot=malloc(size);
icculus@10085
   442
  size_t trunc=TRUNC_aligned*size;
icculus@10085
   443
  assert(pivot!=0);
icculus@10085
   444
icculus@10085
   445
  first=(char*)base; last=first+(nmemb-1)*size;
icculus@10085
   446
slouken@10599
   447
  if ((size_t)(last-first)>=trunc) {
icculus@10085
   448
    char *ffirst=first,*llast=last;
icculus@10085
   449
    while (1) {
icculus@10085
   450
      /* Select pivot */
icculus@10085
   451
      { char * mid=first+size*((last-first)/size >> 1);
icculus@10085
   452
        Pivot(SWAP_aligned,size);
icculus@10085
   453
        memcpy(pivot,mid,size);
icculus@10085
   454
      }
icculus@10085
   455
      /* Partition. */
icculus@10085
   456
      Partition(SWAP_aligned,size);
icculus@10085
   457
      /* Prepare to recurse/iterate. */
icculus@10085
   458
      Recurse(trunc)
icculus@10085
   459
    }
icculus@10085
   460
  }
slouken@10116
   461
  PreInsertion(SWAP_aligned,TRUNC_aligned,size);
icculus@10085
   462
  Insertion(SWAP_aligned);
icculus@10085
   463
  free(pivot);
slouken@1330
   464
}
slouken@1330
   465
icculus@10085
   466
static void qsort_words(void *base, size_t nmemb,
icculus@10085
   467
           int (*compare)(const void *, const void *)) {
icculus@10085
   468
icculus@10085
   469
  stack_entry stack[STACK_SIZE];
icculus@10085
   470
  int stacktop=0;
icculus@10085
   471
  char *first,*last;
icculus@10085
   472
  char *pivot=malloc(WORD_BYTES);
icculus@10085
   473
  assert(pivot!=0);
icculus@10085
   474
icculus@10085
   475
  first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
icculus@10085
   476
icculus@10085
   477
  if (last-first>=TRUNC_words) {
icculus@10085
   478
    char *ffirst=first, *llast=last;
icculus@10085
   479
    while (1) {
icculus@10085
   480
#ifdef DEBUG_QSORT
icculus@10085
   481
fprintf(stderr,"Doing %d:%d: ",
icculus@10085
   482
        (first-(char*)base)/WORD_BYTES,
icculus@10085
   483
        (last-(char*)base)/WORD_BYTES);
icculus@10068
   484
#endif
icculus@10085
   485
      /* Select pivot */
icculus@10085
   486
      { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
icculus@10085
   487
        Pivot(SWAP_words,WORD_BYTES);
icculus@10085
   488
        *(int*)pivot=*(int*)mid;
icculus@10085
   489
#ifdef DEBUG_QSORT
icculus@10085
   490
fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);
icculus@10085
   491
#endif
icculus@10085
   492
      }
icculus@10085
   493
      /* Partition. */
icculus@10085
   494
      Partition(SWAP_words,WORD_BYTES);
icculus@10085
   495
#ifdef DEBUG_QSORT
icculus@10085
   496
fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);
icculus@10085
   497
#endif
icculus@10085
   498
      /* Prepare to recurse/iterate. */
icculus@10085
   499
      Recurse(TRUNC_words)
icculus@10085
   500
    }
icculus@10085
   501
  }
slouken@10116
   502
  PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES);
icculus@10085
   503
  /* Now do insertion sort. */
icculus@10085
   504
  last=((char*)base)+nmemb*WORD_BYTES;
icculus@10085
   505
  for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
icculus@10085
   506
    /* Find the right place for |first|. My apologies for var reuse */
icculus@10085
   507
    int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
icculus@10085
   508
    *(int*)pivot=*(int*)first;
icculus@10085
   509
    for (;compare(pl,pivot)>0;pr=pl,--pl) {
icculus@10085
   510
      *pr=*pl; }
icculus@10085
   511
    if (pr!=(int*)first) *pr=*(int*)pivot;
icculus@10085
   512
  }
icculus@10085
   513
  free(pivot);
icculus@10085
   514
}
icculus@10085
   515
icculus@10085
   516
/* ---------------------------------------------------------------------- */
icculus@10085
   517
icculus@10085
   518
extern void qsortG(void *base, size_t nmemb, size_t size,
icculus@10085
   519
           int (*compare)(const void *, const void *)) {
icculus@10085
   520
icculus@10085
   521
  if (nmemb<=1) return;
icculus@10085
   522
  if (((int)base|size)&(WORD_BYTES-1))
icculus@10085
   523
    qsort_nonaligned(base,nmemb,size,compare);
icculus@10085
   524
  else if (size!=WORD_BYTES)
icculus@10085
   525
    qsort_aligned(base,nmemb,size,compare);
icculus@10085
   526
  else
icculus@10085
   527
    qsort_words(base,nmemb,compare);
icculus@10085
   528
}
icculus@10085
   529
slouken@1330
   530
icculus@10068
   531
#endif /* HAVE_QSORT */
icculus@7003
   532
slouken@1895
   533
/* vi: set ts=4 sw=4 expandtab: */
icculus@10085
   534