src/stdlib/SDL_qsort.c
author Ryan C. Gordon <icculus@icculus.org>
Thu, 21 Apr 2016 03:16:44 -0400
changeset 11729 d1ce8396c356
parent 11468 a39f1b8ff998
child 11811 5d94cb6b24d3
permissions -rw-r--r--
Initial shot at a renderer target for Apple's Metal API.

This isn't complete, but is enough to run testsprite2. It's currently
Mac-only; with a little work to figure out how to properly glue in a Metal
layer to a UIView, this will likely work on iOS, too.

This is only wired up to the configure script right now, and disabled by
default. CMake and Xcode still need their bits filled in as appropriate.
icculus@10085
     1
/*
icculus@10085
     2
  Simple DirectMedia Layer
slouken@10737
     3
  Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
icculus@10085
     4
icculus@10085
     5
  This software is provided 'as-is', without any express or implied
icculus@10085
     6
  warranty.  In no event will the authors be held liable for any damages
icculus@10085
     7
  arising from the use of this software.
icculus@10085
     8
icculus@10085
     9
  Permission is granted to anyone to use this software for any purpose,
icculus@10085
    10
  including commercial applications, and to alter it and redistribute it
icculus@10085
    11
  freely, subject to the following restrictions:
icculus@10085
    12
icculus@10085
    13
  1. The origin of this software must not be misrepresented; you must not
icculus@10085
    14
     claim that you wrote the original software. If you use this software
icculus@10085
    15
     in a product, an acknowledgment in the product documentation would be
icculus@10085
    16
     appreciated but is not required.
icculus@10085
    17
  2. Altered source versions must be plainly marked as such, and must not be
icculus@10085
    18
     misrepresented as being the original software.
icculus@10085
    19
  3. This notice may not be removed or altered from any source distribution.
icculus@10085
    20
*/
icculus@10085
    21
icculus@10085
    22
#if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
icculus@10085
    23
#define SDL_DISABLE_ANALYZE_MACROS 1
icculus@10085
    24
#endif
icculus@10085
    25
icculus@8093
    26
#include "../SDL_internal.h"
slouken@1330
    27
slouken@1354
    28
#include "SDL_stdinc.h"
icculus@6281
    29
#include "SDL_assert.h"
slouken@1330
    30
slouken@7351
    31
#if defined(HAVE_QSORT)
icculus@7003
    32
void
icculus@7003
    33
SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
icculus@7003
    34
{
slouken@7351
    35
    qsort(base, nmemb, size, compare);
icculus@7003
    36
}
icculus@10085
    37
icculus@7003
    38
#else
icculus@7003
    39
icculus@10085
    40
#ifdef assert
icculus@10085
    41
#undef assert
slouken@1330
    42
#endif
icculus@10085
    43
#define assert SDL_assert
icculus@10085
    44
#ifdef malloc
icculus@10085
    45
#undef malloc
icculus@10068
    46
#endif
icculus@10085
    47
#define malloc SDL_malloc
icculus@10085
    48
#ifdef free
icculus@10085
    49
#undef free
icculus@10068
    50
#endif
icculus@10085
    51
#define free SDL_free
icculus@10085
    52
#ifdef memcpy
icculus@10085
    53
#undef memcpy
icculus@10068
    54
#endif
icculus@10085
    55
#define memcpy SDL_memcpy
icculus@10085
    56
#ifdef memmove
icculus@10085
    57
#undef memmove
icculus@10069
    58
#endif
icculus@10085
    59
#define memmove SDL_memmove
icculus@10085
    60
#ifdef qsortG
icculus@10085
    61
#undef qsortG
icculus@10085
    62
#endif
icculus@10085
    63
#define qsortG SDL_qsort
icculus@10068
    64
icculus@10068
    65
/*
icculus@10085
    66
This code came from Gareth McCaughan, under the zlib license.
icculus@10085
    67
Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.14
icculus@10068
    68
icculus@10085
    69
Everything below this comment until the HAVE_QSORT #endif was from Gareth
icculus@10085
    70
(any minor changes will be noted inline).
icculus@10085
    71
icculus@10085
    72
Thank you to Gareth for relicensing this code under the zlib license for our
icculus@10085
    73
benefit!
icculus@10085
    74
icculus@10068
    75
--ryan.
icculus@10068
    76
*/
icculus@10068
    77
icculus@10085
    78
/* This is a drop-in replacement for the C library's |qsort()| routine.
icculus@10085
    79
 *
icculus@10085
    80
 * It is intended for use where you know or suspect that your
icculus@10085
    81
 * platform's qsort is bad. If that isn't the case, then you
icculus@10085
    82
 * should probably use the qsort your system gives you in preference
icculus@10085
    83
 * to mine -- it will likely have been tested and tuned better.
icculus@10085
    84
 *
icculus@10085
    85
 * Features:
icculus@10085
    86
 *   - Median-of-three pivoting (and more)
icculus@10085
    87
 *   - Truncation and final polishing by a single insertion sort
icculus@10085
    88
 *   - Early truncation when no swaps needed in pivoting step
icculus@10085
    89
 *   - Explicit recursion, guaranteed not to overflow
icculus@10085
    90
 *   - A few little wrinkles stolen from the GNU |qsort()|.
icculus@10085
    91
 *     (For the avoidance of doubt, no code was stolen, only
icculus@10085
    92
 *     broad ideas.)
icculus@10085
    93
 *   - separate code for non-aligned / aligned / word-size objects
icculus@10085
    94
 *
icculus@10085
    95
 * Earlier releases of this code used an idiosyncratic licence
icculus@10085
    96
 * I wrote myself, because I'm an idiot. The code is now released
icculus@10085
    97
 * under the "zlib/libpng licence"; you will find the actual
icculus@10085
    98
 * terms in the next comment. I request (but do not require)
icculus@10085
    99
 * that if you make any changes beyond the name of the exported
icculus@10085
   100
 * routine and reasonable tweaks to the TRUNC_* and
icculus@10085
   101
 * PIVOT_THRESHOLD values, you modify the _ID string so as
icculus@10085
   102
 * to make it clear that you have changed the code.
icculus@10085
   103
 *
icculus@10085
   104
 * If you find problems with this code, or find ways of
icculus@10085
   105
 * making it significantly faster, please let me know!
icculus@10085
   106
 * My e-mail address, valid as of early 2016 and for the
icculus@10085
   107
 * foreseeable future, is
icculus@10085
   108
 *    gareth.mccaughan@pobox.com
icculus@10085
   109
 * Thanks!
icculus@10085
   110
 *
icculus@10085
   111
 * Gareth McCaughan
icculus@10085
   112
 */
icculus@10068
   113
icculus@10085
   114
/* Copyright (c) 1998-2016 Gareth McCaughan
icculus@10085
   115
 *
icculus@10085
   116
 * This software is provided 'as-is', without any express or implied
icculus@10085
   117
 * warranty. In no event will the authors be held liable for any
icculus@10085
   118
 * damages arising from the use of this software.
icculus@10085
   119
 *
icculus@10085
   120
 * Permission is granted to anyone to use this software for any purpose,
icculus@10085
   121
 * including commercial applications, and to alter it and redistribute it
icculus@10085
   122
 * freely, subject to the following restrictions:
icculus@10085
   123
 *
icculus@10085
   124
 * 1. The origin of this software must not be misrepresented;
icculus@10085
   125
 *    you must not claim that you wrote the original software.
icculus@10085
   126
 *    If you use this software in a product, an acknowledgment
icculus@10085
   127
 *    in the product documentation would be appreciated but
icculus@10085
   128
 *    is not required.
icculus@10085
   129
 *
icculus@10085
   130
 * 2. Altered source versions must be plainly marked as such,
icculus@10085
   131
 *    and must not be misrepresented as being the original software.
icculus@10085
   132
 *
icculus@10085
   133
 * 3. This notice may not be removed or altered from any source
icculus@10085
   134
 *    distribution.
icculus@10085
   135
 */
icculus@10068
   136
icculus@10085
   137
/* Revision history since release:
icculus@10085
   138
 *   1998-03-19 v1.12 First release I have any records of.
icculus@10085
   139
 *   2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh
icculus@10085
   140
 *                    (premature termination of recursion).
icculus@10085
   141
 *                    Add a few clarifying comments.
icculus@10085
   142
 *                    Minor improvements to debug output.
icculus@10085
   143
 *   2016-02-21 v1.14 Replace licence with 2-clause BSD,
icculus@10085
   144
 *                    and clarify a couple of things in
icculus@10085
   145
 *                    comments. No code changes.
icculus@10085
   146
 */
icculus@10068
   147
icculus@10085
   148
/* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
icculus@10085
   149
#if 0
icculus@10085
   150
#include <assert.h>
icculus@10085
   151
#include <stdlib.h>
icculus@10085
   152
#include <string.h>
icculus@10068
   153
icculus@10085
   154
#define DEBUG_QSORT
icculus@10068
   155
icculus@10085
   156
static char _ID[]="<qsort.c gjm 1.14 2016-02-21>";
icculus@10085
   157
#endif
icculus@10085
   158
/* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
icculus@10068
   159
icculus@10085
   160
/* How many bytes are there per word? (Must be a power of 2,
icculus@10085
   161
 * and must in fact equal sizeof(int).)
icculus@10085
   162
 */
icculus@10085
   163
#define WORD_BYTES sizeof(int)
icculus@10068
   164
icculus@10085
   165
/* How big does our stack need to be? Answer: one entry per
icculus@10085
   166
 * bit in a |size_t|.
icculus@10085
   167
 */
icculus@10085
   168
#define STACK_SIZE (8*sizeof(size_t))
icculus@10085
   169
icculus@10085
   170
/* Different situations have slightly different requirements,
icculus@10085
   171
 * and we make life epsilon easier by using different truncation
icculus@10085
   172
 * points for the three different cases.
icculus@10085
   173
 * So far, I have tuned TRUNC_words and guessed that the same
icculus@10085
   174
 * value might work well for the other two cases. Of course
icculus@10085
   175
 * what works well on my machine might work badly on yours.
icculus@10085
   176
 */
icculus@10085
   177
#define TRUNC_nonaligned	12
icculus@10085
   178
#define TRUNC_aligned		12
icculus@10085
   179
#define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
icculus@10085
   180
icculus@10085
   181
/* We use a simple pivoting algorithm for shortish sub-arrays
icculus@10085
   182
 * and a more complicated one for larger ones. The threshold
icculus@10085
   183
 * is PIVOT_THRESHOLD.
icculus@10085
   184
 */
icculus@10085
   185
#define PIVOT_THRESHOLD 40
icculus@10085
   186
icculus@10085
   187
typedef struct { char * first; char * last; } stack_entry;
icculus@10085
   188
#define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
icculus@10085
   189
#define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
icculus@10085
   190
#define doLeft {first=ffirst;llast=last;continue;}
icculus@10085
   191
#define doRight {ffirst=first;last=llast;continue;}
icculus@10085
   192
#define pop {if (--stacktop<0) break;\
icculus@10085
   193
  first=ffirst=stack[stacktop].first;\
icculus@10085
   194
  last=llast=stack[stacktop].last;\
icculus@10085
   195
  continue;}
icculus@10085
   196
icculus@10085
   197
/* Some comments on the implementation.
icculus@10085
   198
 * 1. When we finish partitioning the array into "low"
icculus@10085
   199
 *    and "high", we forget entirely about short subarrays,
icculus@10085
   200
 *    because they'll be done later by insertion sort.
icculus@10085
   201
 *    Doing lots of little insertion sorts might be a win
icculus@10085
   202
 *    on large datasets for locality-of-reference reasons,
icculus@10085
   203
 *    but it makes the code much nastier and increases
icculus@10085
   204
 *    bookkeeping overhead.
icculus@10085
   205
 * 2. We always save the shorter and get to work on the
icculus@10085
   206
 *    longer. This guarantees that every time we push
icculus@10085
   207
 *    an item onto the stack its size is <= 1/2 of that
icculus@10085
   208
 *    of its parent; so the stack can't need more than
icculus@10085
   209
 *    log_2(max-array-size) entries.
icculus@10085
   210
 * 3. We choose a pivot by looking at the first, last
icculus@10085
   211
 *    and middle elements. We arrange them into order
icculus@10085
   212
 *    because it's easy to do that in conjunction with
icculus@10085
   213
 *    choosing the pivot, and it makes things a little
icculus@10085
   214
 *    easier in the partitioning step. Anyway, the pivot
icculus@10085
   215
 *    is the middle of these three. It's still possible
icculus@10085
   216
 *    to construct datasets where the algorithm takes
icculus@10085
   217
 *    time of order n^2, but it simply never happens in
icculus@10085
   218
 *    practice.
icculus@10085
   219
 * 3' Newsflash: On further investigation I find that
icculus@10085
   220
 *    it's easy to construct datasets where median-of-3
icculus@10085
   221
 *    simply isn't good enough. So on large-ish subarrays
icculus@10085
   222
 *    we do a more sophisticated pivoting: we take three
icculus@10085
   223
 *    sets of 3 elements, find their medians, and then
icculus@10085
   224
 *    take the median of those.
icculus@10085
   225
 * 4. We copy the pivot element to a separate place
icculus@10085
   226
 *    because that way we can always do our comparisons
icculus@10085
   227
 *    directly against a pointer to that separate place,
icculus@10085
   228
 *    and don't have to wonder "did we move the pivot
icculus@10085
   229
 *    element?". This makes the inner loop better.
icculus@10085
   230
 * 5. It's possible to make the pivoting even more
icculus@10085
   231
 *    reliable by looking at more candidates when n
icculus@10085
   232
 *    is larger. (Taking this to its logical conclusion
icculus@10085
   233
 *    results in a variant of quicksort that doesn't
icculus@10085
   234
 *    have that n^2 worst case.) However, the overhead
icculus@10085
   235
 *    from the extra bookkeeping means that it's just
icculus@10085
   236
 *    not worth while.
icculus@10085
   237
 * 6. This is pretty clean and portable code. Here are
icculus@10085
   238
 *    all the potential portability pitfalls and problems
icculus@10085
   239
 *    I know of:
icculus@10085
   240
 *      - In one place (the insertion sort) I construct
icculus@10085
   241
 *        a pointer that points just past the end of the
icculus@10085
   242
 *        supplied array, and assume that (a) it won't
icculus@10085
   243
 *        compare equal to any pointer within the array,
icculus@10085
   244
 *        and (b) it will compare equal to a pointer
icculus@10085
   245
 *        obtained by stepping off the end of the array.
icculus@10085
   246
 *        These might fail on some segmented architectures.
icculus@10085
   247
 *      - I assume that there are 8 bits in a |char| when
icculus@10085
   248
 *        computing the size of stack needed. This would
icculus@10085
   249
 *        fail on machines with 9-bit or 16-bit bytes.
icculus@10085
   250
 *      - I assume that if |((int)base&(sizeof(int)-1))==0|
icculus@10085
   251
 *        and |(size&(sizeof(int)-1))==0| then it's safe to
icculus@10085
   252
 *        get at array elements via |int*|s, and that if
icculus@10085
   253
 *        actually |size==sizeof(int)| as well then it's
icculus@10085
   254
 *        safe to treat the elements as |int|s. This might
icculus@10085
   255
 *        fail on systems that convert pointers to integers
icculus@10085
   256
 *        in non-standard ways.
icculus@10085
   257
 *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
icculus@10085
   258
 *        would be false on a machine with 8-bit |char|s,
icculus@10085
   259
 *        16-bit |int|s and 4096-bit |size_t|s. :-)
icculus@10085
   260
 */
icculus@10085
   261
icculus@10085
   262
/* The recursion logic is the same in each case.
icculus@10085
   263
 * We keep chopping up until we reach subarrays of size
icculus@10085
   264
 * strictly less than Trunc; we leave these unsorted. */
icculus@10085
   265
#define Recurse(Trunc)				\
icculus@10085
   266
      { size_t l=last-ffirst,r=llast-first;	\
icculus@10085
   267
        if (l<Trunc) {				\
icculus@10085
   268
          if (r>=Trunc) doRight			\
icculus@10085
   269
          else pop				\
icculus@10085
   270
        }					\
icculus@10085
   271
        else if (l<=r) { pushLeft; doRight }	\
icculus@10085
   272
        else if (r>=Trunc) { pushRight; doLeft }\
icculus@10085
   273
        else doLeft				\
icculus@10085
   274
      }
icculus@10085
   275
icculus@10085
   276
/* and so is the pivoting logic (note: last is inclusive): */
icculus@10085
   277
#define Pivot(swapper,sz)			\
slouken@10599
   278
  if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
icculus@10085
   279
  else {	\
icculus@10085
   280
    if (compare(first,mid)<0) {			\
icculus@10085
   281
      if (compare(mid,last)>0) {		\
icculus@10085
   282
        swapper(mid,last);			\
icculus@10085
   283
        if (compare(first,mid)>0) swapper(first,mid);\
icculus@10085
   284
      }						\
icculus@10085
   285
    }						\
icculus@10085
   286
    else {					\
icculus@10085
   287
      if (compare(mid,last)>0) swapper(first,last)\
icculus@10085
   288
      else {					\
icculus@10085
   289
        swapper(first,mid);			\
icculus@10085
   290
        if (compare(mid,last)>0) swapper(mid,last);\
icculus@10085
   291
      }						\
icculus@10085
   292
    }						\
icculus@10085
   293
    first+=sz; last-=sz;			\
icculus@10085
   294
  }
icculus@10085
   295
icculus@10085
   296
#ifdef DEBUG_QSORT
icculus@10085
   297
#include <stdio.h>
icculus@10085
   298
#endif
icculus@10085
   299
icculus@10085
   300
/* and so is the partitioning logic: */
icculus@10085
   301
#define Partition(swapper,sz) {			\
icculus@10085
   302
  do {						\
icculus@10085
   303
    while (compare(first,pivot)<0) first+=sz;	\
icculus@10085
   304
    while (compare(pivot,last)<0) last-=sz;	\
icculus@10085
   305
    if (first<last) {				\
icculus@10085
   306
      swapper(first,last);			\
icculus@10085
   307
      first+=sz; last-=sz; }			\
icculus@10085
   308
    else if (first==last) { first+=sz; last-=sz; break; }\
icculus@10085
   309
  } while (first<=last);			\
slouken@1330
   310
}
slouken@1330
   311
icculus@10085
   312
/* and so is the pre-insertion-sort operation of putting
icculus@10085
   313
 * the smallest element into place as a sentinel.
icculus@10085
   314
 * Doing this makes the inner loop nicer. I got this
icculus@10085
   315
 * idea from the GNU implementation of qsort().
icculus@10085
   316
 * We find the smallest element from the first |nmemb|,
icculus@10085
   317
 * or the first |limit|, whichever is smaller;
icculus@10085
   318
 * therefore we must have ensured that the globally smallest
icculus@10085
   319
 * element is in the first |limit|.
icculus@10085
   320
 */
icculus@10085
   321
#define PreInsertion(swapper,limit,sz)		\
icculus@10085
   322
  first=base;					\
icculus@10085
   323
  last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\
icculus@10085
   324
  while (last!=base) {				\
icculus@10085
   325
    if (compare(first,last)>0) first=last;	\
icculus@10085
   326
    last-=sz; }					\
icculus@10085
   327
  if (first!=base) swapper(first,(char*)base);
slouken@1330
   328
icculus@10085
   329
/* and so is the insertion sort, in the first two cases: */
icculus@10085
   330
#define Insertion(swapper)			\
icculus@10085
   331
  last=((char*)base)+nmemb*size;		\
icculus@10085
   332
  for (first=((char*)base)+size;first!=last;first+=size) {	\
icculus@10085
   333
    char *test;					\
icculus@10085
   334
    /* Find the right place for |first|.	\
icculus@10085
   335
     * My apologies for var reuse. */		\
icculus@10085
   336
    for (test=first-size;compare(test,first)>0;test-=size) ;	\
icculus@10085
   337
    test+=size;					\
icculus@10085
   338
    if (test!=first) {				\
icculus@10085
   339
      /* Shift everything in [test,first)	\
icculus@10085
   340
       * up by one, and place |first|		\
icculus@10085
   341
       * where |test| is. */			\
icculus@10085
   342
      memcpy(pivot,first,size);			\
icculus@10085
   343
      memmove(test+size,test,first-test);	\
icculus@10085
   344
      memcpy(test,pivot,size);			\
icculus@10085
   345
    }						\
icculus@10085
   346
  }
slouken@1330
   347
icculus@10085
   348
#define SWAP_nonaligned(a,b) { \
icculus@10085
   349
  register char *aa=(a),*bb=(b); \
icculus@10085
   350
  register size_t sz=size; \
icculus@10085
   351
  do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
slouken@1330
   352
icculus@10085
   353
#define SWAP_aligned(a,b) { \
icculus@10085
   354
  register int *aa=(int*)(a),*bb=(int*)(b); \
icculus@10085
   355
  register size_t sz=size; \
icculus@10085
   356
  do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
icculus@10085
   357
icculus@10085
   358
#define SWAP_words(a,b) { \
icculus@10085
   359
  register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
icculus@10085
   360
icculus@10085
   361
/* ---------------------------------------------------------------------- */
icculus@10085
   362
icculus@10085
   363
static char * pivot_big(char *first, char *mid, char *last, size_t size,
icculus@10085
   364
                        int compare(const void *, const void *)) {
icculus@11416
   365
  size_t d=(((last-first)/size)>>3)*size;
icculus@10085
   366
#ifdef DEBUG_QSORT
icculus@10085
   367
fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));
icculus@10085
   368
#endif
icculus@10085
   369
  char *m1,*m2,*m3;
icculus@10085
   370
  { char *a=first, *b=first+d, *c=first+2*d;
icculus@10085
   371
#ifdef DEBUG_QSORT
icculus@10085
   372
fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
icculus@10085
   373
#endif
icculus@10085
   374
    m1 = compare(a,b)<0 ?
icculus@10085
   375
           (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
icculus@10085
   376
         : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
icculus@10085
   377
  }
icculus@10085
   378
  { char *a=mid-d, *b=mid, *c=mid+d;
icculus@10085
   379
#ifdef DEBUG_QSORT
icculus@10085
   380
fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
icculus@10085
   381
#endif
icculus@10085
   382
    m2 = compare(a,b)<0 ?
icculus@10085
   383
           (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
icculus@10085
   384
         : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
icculus@10085
   385
  }
icculus@10085
   386
  { char *a=last-2*d, *b=last-d, *c=last;
icculus@10085
   387
#ifdef DEBUG_QSORT
icculus@10085
   388
fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
icculus@10085
   389
#endif
icculus@10085
   390
    m3 = compare(a,b)<0 ?
icculus@10085
   391
           (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
icculus@10085
   392
         : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
icculus@10085
   393
  }
icculus@10085
   394
#ifdef DEBUG_QSORT
icculus@10085
   395
fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);
icculus@10085
   396
#endif
icculus@10085
   397
  return compare(m1,m2)<0 ?
icculus@10085
   398
           (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
icculus@10085
   399
         : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
slouken@1330
   400
}
slouken@1330
   401
icculus@10085
   402
/* ---------------------------------------------------------------------- */
slouken@1330
   403
icculus@10085
   404
static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
icculus@10085
   405
           int (*compare)(const void *, const void *)) {
icculus@10068
   406
icculus@10085
   407
  stack_entry stack[STACK_SIZE];
icculus@10085
   408
  int stacktop=0;
icculus@10085
   409
  char *first,*last;
icculus@10085
   410
  char *pivot=malloc(size);
icculus@10085
   411
  size_t trunc=TRUNC_nonaligned*size;
icculus@10085
   412
  assert(pivot!=0);
icculus@10085
   413
icculus@10085
   414
  first=(char*)base; last=first+(nmemb-1)*size;
icculus@10085
   415
slouken@10599
   416
  if ((size_t)(last-first)>=trunc) {
icculus@10085
   417
    char *ffirst=first, *llast=last;
icculus@10085
   418
    while (1) {
icculus@10085
   419
      /* Select pivot */
icculus@10085
   420
      { char * mid=first+size*((last-first)/size >> 1);
icculus@10085
   421
        Pivot(SWAP_nonaligned,size);
icculus@10085
   422
        memcpy(pivot,mid,size);
icculus@10085
   423
      }
icculus@10085
   424
      /* Partition. */
icculus@10085
   425
      Partition(SWAP_nonaligned,size);
icculus@10085
   426
      /* Prepare to recurse/iterate. */
icculus@10085
   427
      Recurse(trunc)
icculus@10085
   428
    }
icculus@10085
   429
  }
slouken@10116
   430
  PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
icculus@10085
   431
  Insertion(SWAP_nonaligned);
icculus@10085
   432
  free(pivot);
slouken@1330
   433
}
slouken@1330
   434
icculus@10085
   435
static void qsort_aligned(void *base, size_t nmemb, size_t size,
icculus@10085
   436
           int (*compare)(const void *, const void *)) {
icculus@10085
   437
icculus@10085
   438
  stack_entry stack[STACK_SIZE];
icculus@10085
   439
  int stacktop=0;
icculus@10085
   440
  char *first,*last;
icculus@10085
   441
  char *pivot=malloc(size);
icculus@10085
   442
  size_t trunc=TRUNC_aligned*size;
icculus@10085
   443
  assert(pivot!=0);
icculus@10085
   444
icculus@10085
   445
  first=(char*)base; last=first+(nmemb-1)*size;
icculus@10085
   446
slouken@10599
   447
  if ((size_t)(last-first)>=trunc) {
icculus@10085
   448
    char *ffirst=first,*llast=last;
icculus@10085
   449
    while (1) {
icculus@10085
   450
      /* Select pivot */
icculus@10085
   451
      { char * mid=first+size*((last-first)/size >> 1);
icculus@10085
   452
        Pivot(SWAP_aligned,size);
icculus@10085
   453
        memcpy(pivot,mid,size);
icculus@10085
   454
      }
icculus@10085
   455
      /* Partition. */
icculus@10085
   456
      Partition(SWAP_aligned,size);
icculus@10085
   457
      /* Prepare to recurse/iterate. */
icculus@10085
   458
      Recurse(trunc)
icculus@10085
   459
    }
icculus@10085
   460
  }
slouken@10116
   461
  PreInsertion(SWAP_aligned,TRUNC_aligned,size);
icculus@10085
   462
  Insertion(SWAP_aligned);
icculus@10085
   463
  free(pivot);
slouken@1330
   464
}
slouken@1330
   465
icculus@10085
   466
static void qsort_words(void *base, size_t nmemb,
icculus@10085
   467
           int (*compare)(const void *, const void *)) {
icculus@10085
   468
icculus@10085
   469
  stack_entry stack[STACK_SIZE];
icculus@10085
   470
  int stacktop=0;
icculus@10085
   471
  char *first,*last;
icculus@10085
   472
  char *pivot=malloc(WORD_BYTES);
icculus@10085
   473
  assert(pivot!=0);
icculus@10085
   474
icculus@10085
   475
  first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
icculus@10085
   476
icculus@10085
   477
  if (last-first>=TRUNC_words) {
icculus@10085
   478
    char *ffirst=first, *llast=last;
icculus@10085
   479
    while (1) {
icculus@10085
   480
#ifdef DEBUG_QSORT
icculus@10085
   481
fprintf(stderr,"Doing %d:%d: ",
icculus@10085
   482
        (first-(char*)base)/WORD_BYTES,
icculus@10085
   483
        (last-(char*)base)/WORD_BYTES);
icculus@10068
   484
#endif
icculus@10085
   485
      /* Select pivot */
icculus@10085
   486
      { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
icculus@10085
   487
        Pivot(SWAP_words,WORD_BYTES);
icculus@10085
   488
        *(int*)pivot=*(int*)mid;
icculus@10085
   489
#ifdef DEBUG_QSORT
icculus@10085
   490
fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);
icculus@10085
   491
#endif
icculus@10085
   492
      }
icculus@10085
   493
      /* Partition. */
icculus@10085
   494
      Partition(SWAP_words,WORD_BYTES);
icculus@10085
   495
#ifdef DEBUG_QSORT
icculus@10085
   496
fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);
icculus@10085
   497
#endif
icculus@10085
   498
      /* Prepare to recurse/iterate. */
icculus@10085
   499
      Recurse(TRUNC_words)
icculus@10085
   500
    }
icculus@10085
   501
  }
slouken@10116
   502
  PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES);
icculus@10085
   503
  /* Now do insertion sort. */
icculus@10085
   504
  last=((char*)base)+nmemb*WORD_BYTES;
icculus@10085
   505
  for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
icculus@10085
   506
    /* Find the right place for |first|. My apologies for var reuse */
icculus@10085
   507
    int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
icculus@10085
   508
    *(int*)pivot=*(int*)first;
icculus@10085
   509
    for (;compare(pl,pivot)>0;pr=pl,--pl) {
icculus@10085
   510
      *pr=*pl; }
icculus@10085
   511
    if (pr!=(int*)first) *pr=*(int*)pivot;
icculus@10085
   512
  }
icculus@10085
   513
  free(pivot);
icculus@10085
   514
}
icculus@10085
   515
icculus@10085
   516
/* ---------------------------------------------------------------------- */
icculus@10085
   517
icculus@10085
   518
extern void qsortG(void *base, size_t nmemb, size_t size,
icculus@10085
   519
           int (*compare)(const void *, const void *)) {
icculus@10085
   520
icculus@10085
   521
  if (nmemb<=1) return;
slouken@11468
   522
  if (((size_t)base|size)&(WORD_BYTES-1))
icculus@10085
   523
    qsort_nonaligned(base,nmemb,size,compare);
icculus@10085
   524
  else if (size!=WORD_BYTES)
icculus@10085
   525
    qsort_aligned(base,nmemb,size,compare);
icculus@10085
   526
  else
icculus@10085
   527
    qsort_words(base,nmemb,compare);
icculus@10085
   528
}
icculus@10085
   529
slouken@1330
   530
icculus@10068
   531
#endif /* HAVE_QSORT */
icculus@7003
   532
slouken@1895
   533
/* vi: set ts=4 sw=4 expandtab: */
icculus@10085
   534