slouken@1330
|
1 |
/* qsort.c
|
slouken@1330
|
2 |
* (c) 1998 Gareth McCaughan
|
slouken@1330
|
3 |
*
|
slouken@1330
|
4 |
* This is a drop-in replacement for the C library's |qsort()| routine.
|
slouken@1330
|
5 |
*
|
slouken@1330
|
6 |
* Features:
|
slouken@1330
|
7 |
* - Median-of-three pivoting (and more)
|
slouken@1330
|
8 |
* - Truncation and final polishing by a single insertion sort
|
slouken@1330
|
9 |
* - Early truncation when no swaps needed in pivoting step
|
slouken@1330
|
10 |
* - Explicit recursion, guaranteed not to overflow
|
slouken@1330
|
11 |
* - A few little wrinkles stolen from the GNU |qsort()|.
|
slouken@1330
|
12 |
* - separate code for non-aligned / aligned / word-size objects
|
slouken@1330
|
13 |
*
|
slouken@1330
|
14 |
* This code may be reproduced freely provided
|
slouken@1330
|
15 |
* - this file is retained unaltered apart from minor
|
slouken@1330
|
16 |
* changes for portability and efficiency
|
slouken@1330
|
17 |
* - no changes are made to this comment
|
slouken@1330
|
18 |
* - any changes that *are* made are clearly flagged
|
slouken@1330
|
19 |
* - the _ID string below is altered by inserting, after
|
slouken@1330
|
20 |
* the date, the string " altered" followed at your option
|
slouken@1330
|
21 |
* by other material. (Exceptions: you may change the name
|
slouken@1330
|
22 |
* of the exported routine without changing the ID string.
|
slouken@1330
|
23 |
* You may change the values of the macros TRUNC_* and
|
slouken@1330
|
24 |
* PIVOT_THRESHOLD without changing the ID string, provided
|
slouken@1330
|
25 |
* they remain constants with TRUNC_nonaligned, TRUNC_aligned
|
slouken@1330
|
26 |
* and TRUNC_words/WORD_BYTES between 8 and 24, and
|
slouken@1330
|
27 |
* PIVOT_THRESHOLD between 32 and 200.)
|
slouken@1330
|
28 |
*
|
slouken@1330
|
29 |
* You may use it in anything you like; you may make money
|
slouken@1330
|
30 |
* out of it; you may distribute it in object form or as
|
slouken@1330
|
31 |
* part of an executable without including source code;
|
slouken@1330
|
32 |
* you don't have to credit me. (But it would be nice if
|
slouken@1330
|
33 |
* you did.)
|
slouken@1330
|
34 |
*
|
slouken@1330
|
35 |
* If you find problems with this code, or find ways of
|
slouken@1330
|
36 |
* making it significantly faster, please let me know!
|
slouken@1330
|
37 |
* My e-mail address, valid as of early 1998 and certainly
|
slouken@1330
|
38 |
* OK for at least the next 18 months, is
|
slouken@1330
|
39 |
* gjm11@dpmms.cam.ac.uk
|
slouken@1330
|
40 |
* Thanks!
|
slouken@1330
|
41 |
*
|
slouken@1330
|
42 |
* Gareth McCaughan Peterhouse Cambridge 1998
|
slouken@1330
|
43 |
*/
|
slouken@1402
|
44 |
#include "SDL_config.h"
|
slouken@1330
|
45 |
|
slouken@1330
|
46 |
/*
|
slouken@1330
|
47 |
#include <assert.h>
|
slouken@1330
|
48 |
#include <stdlib.h>
|
slouken@1330
|
49 |
#include <string.h>
|
slouken@1330
|
50 |
*/
|
slouken@1354
|
51 |
#include "SDL_stdinc.h"
|
slouken@1330
|
52 |
|
slouken@1337
|
53 |
#define assert(X)
|
slouken@1337
|
54 |
#define malloc SDL_malloc
|
slouken@1337
|
55 |
#define free SDL_free
|
slouken@1337
|
56 |
#define memcpy SDL_memcpy
|
slouken@1337
|
57 |
#define memmove SDL_memmove
|
slouken@1337
|
58 |
#define qsort SDL_qsort
|
slouken@1337
|
59 |
|
slouken@1337
|
60 |
|
slouken@1330
|
61 |
#ifndef HAVE_QSORT
|
slouken@1330
|
62 |
|
slouken@3162
|
63 |
static const char _ID[] = "<qsort.c gjm 1.12 1998-03-19>";
|
slouken@1330
|
64 |
|
slouken@1330
|
65 |
/* How many bytes are there per word? (Must be a power of 2,
|
slouken@1330
|
66 |
* and must in fact equal sizeof(int).)
|
slouken@1330
|
67 |
*/
|
slouken@1330
|
68 |
#define WORD_BYTES sizeof(int)
|
slouken@1330
|
69 |
|
slouken@1330
|
70 |
/* How big does our stack need to be? Answer: one entry per
|
slouken@1330
|
71 |
* bit in a |size_t|.
|
slouken@1330
|
72 |
*/
|
slouken@1330
|
73 |
#define STACK_SIZE (8*sizeof(size_t))
|
slouken@1330
|
74 |
|
slouken@1330
|
75 |
/* Different situations have slightly different requirements,
|
slouken@1330
|
76 |
* and we make life epsilon easier by using different truncation
|
slouken@1330
|
77 |
* points for the three different cases.
|
slouken@1330
|
78 |
* So far, I have tuned TRUNC_words and guessed that the same
|
slouken@1330
|
79 |
* value might work well for the other two cases. Of course
|
slouken@1330
|
80 |
* what works well on my machine might work badly on yours.
|
slouken@1330
|
81 |
*/
|
slouken@1330
|
82 |
#define TRUNC_nonaligned 12
|
slouken@1330
|
83 |
#define TRUNC_aligned 12
|
slouken@1895
|
84 |
#define TRUNC_words 12*WORD_BYTES /* nb different meaning */
|
slouken@1330
|
85 |
|
slouken@1330
|
86 |
/* We use a simple pivoting algorithm for shortish sub-arrays
|
slouken@1330
|
87 |
* and a more complicated one for larger ones. The threshold
|
slouken@1330
|
88 |
* is PIVOT_THRESHOLD.
|
slouken@1330
|
89 |
*/
|
slouken@1330
|
90 |
#define PIVOT_THRESHOLD 40
|
slouken@1330
|
91 |
|
slouken@1895
|
92 |
typedef struct
|
slouken@1895
|
93 |
{
|
slouken@1895
|
94 |
char *first;
|
slouken@1895
|
95 |
char *last;
|
slouken@1895
|
96 |
} stack_entry;
|
slouken@1330
|
97 |
#define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
|
slouken@1330
|
98 |
#define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
|
slouken@1330
|
99 |
#define doLeft {first=ffirst;llast=last;continue;}
|
slouken@1330
|
100 |
#define doRight {ffirst=first;last=llast;continue;}
|
slouken@1330
|
101 |
#define pop {if (--stacktop<0) break;\
|
slouken@1330
|
102 |
first=ffirst=stack[stacktop].first;\
|
slouken@1330
|
103 |
last=llast=stack[stacktop].last;\
|
slouken@1330
|
104 |
continue;}
|
slouken@1330
|
105 |
|
slouken@1330
|
106 |
/* Some comments on the implementation.
|
slouken@1330
|
107 |
* 1. When we finish partitioning the array into "low"
|
slouken@1330
|
108 |
* and "high", we forget entirely about short subarrays,
|
slouken@1330
|
109 |
* because they'll be done later by insertion sort.
|
slouken@1330
|
110 |
* Doing lots of little insertion sorts might be a win
|
slouken@1330
|
111 |
* on large datasets for locality-of-reference reasons,
|
slouken@1330
|
112 |
* but it makes the code much nastier and increases
|
slouken@1330
|
113 |
* bookkeeping overhead.
|
slouken@1330
|
114 |
* 2. We always save the shorter and get to work on the
|
slouken@1330
|
115 |
* longer. This guarantees that every time we push
|
slouken@1330
|
116 |
* an item onto the stack its size is <= 1/2 of that
|
slouken@1330
|
117 |
* of its parent; so the stack can't need more than
|
slouken@1330
|
118 |
* log_2(max-array-size) entries.
|
slouken@1330
|
119 |
* 3. We choose a pivot by looking at the first, last
|
slouken@1330
|
120 |
* and middle elements. We arrange them into order
|
slouken@1330
|
121 |
* because it's easy to do that in conjunction with
|
slouken@1330
|
122 |
* choosing the pivot, and it makes things a little
|
slouken@1330
|
123 |
* easier in the partitioning step. Anyway, the pivot
|
slouken@1330
|
124 |
* is the middle of these three. It's still possible
|
slouken@1330
|
125 |
* to construct datasets where the algorithm takes
|
slouken@1330
|
126 |
* time of order n^2, but it simply never happens in
|
slouken@1330
|
127 |
* practice.
|
slouken@1330
|
128 |
* 3' Newsflash: On further investigation I find that
|
slouken@1330
|
129 |
* it's easy to construct datasets where median-of-3
|
slouken@1330
|
130 |
* simply isn't good enough. So on large-ish subarrays
|
slouken@1330
|
131 |
* we do a more sophisticated pivoting: we take three
|
slouken@1330
|
132 |
* sets of 3 elements, find their medians, and then
|
slouken@1330
|
133 |
* take the median of those.
|
slouken@1330
|
134 |
* 4. We copy the pivot element to a separate place
|
slouken@1330
|
135 |
* because that way we can always do our comparisons
|
slouken@1330
|
136 |
* directly against a pointer to that separate place,
|
slouken@1330
|
137 |
* and don't have to wonder "did we move the pivot
|
slouken@1330
|
138 |
* element?". This makes the inner loop better.
|
slouken@1330
|
139 |
* 5. It's possible to make the pivoting even more
|
slouken@1330
|
140 |
* reliable by looking at more candidates when n
|
slouken@1330
|
141 |
* is larger. (Taking this to its logical conclusion
|
slouken@1330
|
142 |
* results in a variant of quicksort that doesn't
|
slouken@1330
|
143 |
* have that n^2 worst case.) However, the overhead
|
slouken@1330
|
144 |
* from the extra bookkeeping means that it's just
|
slouken@1330
|
145 |
* not worth while.
|
slouken@1330
|
146 |
* 6. This is pretty clean and portable code. Here are
|
slouken@1330
|
147 |
* all the potential portability pitfalls and problems
|
slouken@1330
|
148 |
* I know of:
|
slouken@1330
|
149 |
* - In one place (the insertion sort) I construct
|
slouken@1330
|
150 |
* a pointer that points just past the end of the
|
slouken@1330
|
151 |
* supplied array, and assume that (a) it won't
|
slouken@1330
|
152 |
* compare equal to any pointer within the array,
|
slouken@1330
|
153 |
* and (b) it will compare equal to a pointer
|
slouken@1330
|
154 |
* obtained by stepping off the end of the array.
|
slouken@1330
|
155 |
* These might fail on some segmented architectures.
|
slouken@1330
|
156 |
* - I assume that there are 8 bits in a |char| when
|
slouken@1330
|
157 |
* computing the size of stack needed. This would
|
slouken@1330
|
158 |
* fail on machines with 9-bit or 16-bit bytes.
|
slouken@1330
|
159 |
* - I assume that if |((int)base&(sizeof(int)-1))==0|
|
slouken@1330
|
160 |
* and |(size&(sizeof(int)-1))==0| then it's safe to
|
slouken@1330
|
161 |
* get at array elements via |int*|s, and that if
|
slouken@1330
|
162 |
* actually |size==sizeof(int)| as well then it's
|
slouken@1330
|
163 |
* safe to treat the elements as |int|s. This might
|
slouken@1330
|
164 |
* fail on systems that convert pointers to integers
|
slouken@1330
|
165 |
* in non-standard ways.
|
slouken@1330
|
166 |
* - I assume that |8*sizeof(size_t)<=INT_MAX|. This
|
slouken@1330
|
167 |
* would be false on a machine with 8-bit |char|s,
|
slouken@1330
|
168 |
* 16-bit |int|s and 4096-bit |size_t|s. :-)
|
slouken@1330
|
169 |
*/
|
slouken@1330
|
170 |
|
slouken@1330
|
171 |
/* The recursion logic is the same in each case: */
|
slouken@1330
|
172 |
#define Recurse(Trunc) \
|
slouken@1330
|
173 |
{ size_t l=last-ffirst,r=llast-first; \
|
slouken@1330
|
174 |
if (l<Trunc) { \
|
slouken@1330
|
175 |
if (r>=Trunc) doRight \
|
slouken@1330
|
176 |
else pop \
|
slouken@1330
|
177 |
} \
|
slouken@1330
|
178 |
else if (l<=r) { pushLeft; doRight } \
|
slouken@1330
|
179 |
else if (r>=Trunc) { pushRight; doLeft }\
|
slouken@1330
|
180 |
else doLeft \
|
slouken@1330
|
181 |
}
|
slouken@1330
|
182 |
|
slouken@1330
|
183 |
/* and so is the pivoting logic: */
|
slouken@1330
|
184 |
#define Pivot(swapper,sz) \
|
slouken@1330
|
185 |
if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
|
slouken@1330
|
186 |
else { \
|
slouken@1330
|
187 |
if (compare(first,mid)<0) { \
|
slouken@1330
|
188 |
if (compare(mid,last)>0) { \
|
slouken@1330
|
189 |
swapper(mid,last); \
|
slouken@1330
|
190 |
if (compare(first,mid)>0) swapper(first,mid);\
|
slouken@1330
|
191 |
} \
|
slouken@1330
|
192 |
} \
|
slouken@1330
|
193 |
else { \
|
slouken@1330
|
194 |
if (compare(mid,last)>0) swapper(first,last)\
|
slouken@1330
|
195 |
else { \
|
slouken@1330
|
196 |
swapper(first,mid); \
|
slouken@1330
|
197 |
if (compare(mid,last)>0) swapper(mid,last);\
|
slouken@1330
|
198 |
} \
|
slouken@1330
|
199 |
} \
|
slouken@1330
|
200 |
first+=sz; last-=sz; \
|
slouken@1330
|
201 |
}
|
slouken@1330
|
202 |
|
slouken@1330
|
203 |
#ifdef DEBUG_QSORT
|
slouken@1330
|
204 |
#include <stdio.h>
|
slouken@1330
|
205 |
#endif
|
slouken@1330
|
206 |
|
slouken@1330
|
207 |
/* and so is the partitioning logic: */
|
slouken@1330
|
208 |
#define Partition(swapper,sz) { \
|
slouken@1330
|
209 |
int swapped=0; \
|
slouken@1330
|
210 |
do { \
|
slouken@1330
|
211 |
while (compare(first,pivot)<0) first+=sz; \
|
slouken@1330
|
212 |
while (compare(pivot,last)<0) last-=sz; \
|
slouken@1330
|
213 |
if (first<last) { \
|
slouken@1330
|
214 |
swapper(first,last); swapped=1; \
|
slouken@1330
|
215 |
first+=sz; last-=sz; } \
|
slouken@1330
|
216 |
else if (first==last) { first+=sz; last-=sz; break; }\
|
slouken@1330
|
217 |
} while (first<=last); \
|
slouken@1330
|
218 |
if (!swapped) pop \
|
slouken@1330
|
219 |
}
|
slouken@1330
|
220 |
|
slouken@1330
|
221 |
/* and so is the pre-insertion-sort operation of putting
|
slouken@1330
|
222 |
* the smallest element into place as a sentinel.
|
slouken@1330
|
223 |
* Doing this makes the inner loop nicer. I got this
|
slouken@1330
|
224 |
* idea from the GNU implementation of qsort().
|
slouken@1330
|
225 |
*/
|
slouken@1330
|
226 |
#define PreInsertion(swapper,limit,sz) \
|
slouken@1330
|
227 |
first=base; \
|
slouken@1330
|
228 |
last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
|
slouken@1330
|
229 |
while (last!=base) { \
|
slouken@1330
|
230 |
if (compare(first,last)>0) first=last; \
|
slouken@1330
|
231 |
last-=sz; } \
|
slouken@1330
|
232 |
if (first!=base) swapper(first,(char*)base);
|
slouken@1330
|
233 |
|
slouken@1330
|
234 |
/* and so is the insertion sort, in the first two cases: */
|
slouken@1330
|
235 |
#define Insertion(swapper) \
|
slouken@1330
|
236 |
last=((char*)base)+nmemb*size; \
|
slouken@1330
|
237 |
for (first=((char*)base)+size;first!=last;first+=size) { \
|
slouken@1330
|
238 |
char *test; \
|
slouken@1330
|
239 |
/* Find the right place for |first|. \
|
slouken@1330
|
240 |
* My apologies for var reuse. */ \
|
slouken@1330
|
241 |
for (test=first-size;compare(test,first)>0;test-=size) ; \
|
slouken@1330
|
242 |
test+=size; \
|
slouken@1330
|
243 |
if (test!=first) { \
|
slouken@1330
|
244 |
/* Shift everything in [test,first) \
|
slouken@1330
|
245 |
* up by one, and place |first| \
|
slouken@1330
|
246 |
* where |test| is. */ \
|
slouken@1337
|
247 |
memcpy(pivot,first,size); \
|
slouken@1337
|
248 |
memmove(test+size,test,first-test); \
|
slouken@1337
|
249 |
memcpy(test,pivot,size); \
|
slouken@1330
|
250 |
} \
|
slouken@1330
|
251 |
}
|
slouken@1330
|
252 |
|
slouken@1330
|
253 |
#define SWAP_nonaligned(a,b) { \
|
slouken@1330
|
254 |
register char *aa=(a),*bb=(b); \
|
slouken@1330
|
255 |
register size_t sz=size; \
|
slouken@1330
|
256 |
do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
|
slouken@1330
|
257 |
|
slouken@1330
|
258 |
#define SWAP_aligned(a,b) { \
|
slouken@1330
|
259 |
register int *aa=(int*)(a),*bb=(int*)(b); \
|
slouken@1330
|
260 |
register size_t sz=size; \
|
slouken@1330
|
261 |
do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
|
slouken@1330
|
262 |
|
slouken@1330
|
263 |
#define SWAP_words(a,b) { \
|
slouken@1330
|
264 |
register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
|
slouken@1330
|
265 |
|
slouken@1330
|
266 |
/* ---------------------------------------------------------------------- */
|
slouken@1330
|
267 |
|
slouken@1895
|
268 |
static char *
|
slouken@1895
|
269 |
pivot_big(char *first, char *mid, char *last, size_t size,
|
slouken@1895
|
270 |
int compare(const void *, const void *))
|
slouken@1895
|
271 |
{
|
slouken@1895
|
272 |
size_t d = (((last - first) / size) >> 3) * size;
|
slouken@1895
|
273 |
char *m1, *m2, *m3;
|
slouken@1895
|
274 |
{
|
slouken@1895
|
275 |
char *a = first, *b = first + d, *c = first + 2 * d;
|
slouken@1330
|
276 |
#ifdef DEBUG_QSORT
|
slouken@1895
|
277 |
fprintf(stderr, "< %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
|
slouken@1330
|
278 |
#endif
|
slouken@1895
|
279 |
m1 = compare(a, b) < 0 ?
|
slouken@1895
|
280 |
(compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
|
slouken@1895
|
281 |
: (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
|
slouken@1895
|
282 |
}
|
slouken@1895
|
283 |
{
|
slouken@1895
|
284 |
char *a = mid - d, *b = mid, *c = mid + d;
|
slouken@1330
|
285 |
#ifdef DEBUG_QSORT
|
slouken@1895
|
286 |
fprintf(stderr, ". %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
|
slouken@1330
|
287 |
#endif
|
slouken@1895
|
288 |
m2 = compare(a, b) < 0 ?
|
slouken@1895
|
289 |
(compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
|
slouken@1895
|
290 |
: (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
|
slouken@1895
|
291 |
}
|
slouken@1895
|
292 |
{
|
slouken@1895
|
293 |
char *a = last - 2 * d, *b = last - d, *c = last;
|
slouken@1330
|
294 |
#ifdef DEBUG_QSORT
|
slouken@1895
|
295 |
fprintf(stderr, "> %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
|
slouken@1330
|
296 |
#endif
|
slouken@1895
|
297 |
m3 = compare(a, b) < 0 ?
|
slouken@1895
|
298 |
(compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
|
slouken@1895
|
299 |
: (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
|
slouken@1895
|
300 |
}
|
slouken@1330
|
301 |
#ifdef DEBUG_QSORT
|
slouken@1895
|
302 |
fprintf(stderr, "-> %d %d %d\n", *(int *) m1, *(int *) m2, *(int *) m3);
|
slouken@1330
|
303 |
#endif
|
slouken@1895
|
304 |
return compare(m1, m2) < 0 ?
|
slouken@1895
|
305 |
(compare(m2, m3) < 0 ? m2 : (compare(m1, m3) < 0 ? m3 : m1))
|
slouken@1895
|
306 |
: (compare(m1, m3) < 0 ? m1 : (compare(m2, m3) < 0 ? m3 : m2));
|
slouken@1330
|
307 |
}
|
slouken@1330
|
308 |
|
slouken@1330
|
309 |
/* ---------------------------------------------------------------------- */
|
slouken@1330
|
310 |
|
slouken@1895
|
311 |
static void
|
slouken@1895
|
312 |
qsort_nonaligned(void *base, size_t nmemb, size_t size,
|
slouken@1895
|
313 |
int (*compare) (const void *, const void *))
|
slouken@1895
|
314 |
{
|
slouken@1330
|
315 |
|
slouken@1895
|
316 |
stack_entry stack[STACK_SIZE];
|
slouken@1895
|
317 |
int stacktop = 0;
|
slouken@1895
|
318 |
char *first, *last;
|
slouken@1895
|
319 |
char *pivot = malloc(size);
|
slouken@1895
|
320 |
size_t trunc = TRUNC_nonaligned * size;
|
slouken@1895
|
321 |
assert(pivot != 0);
|
slouken@1330
|
322 |
|
slouken@1895
|
323 |
first = (char *) base;
|
slouken@1895
|
324 |
last = first + (nmemb - 1) * size;
|
slouken@1330
|
325 |
|
slouken@1895
|
326 |
if ((size_t) (last - first) > trunc) {
|
slouken@1895
|
327 |
char *ffirst = first, *llast = last;
|
slouken@1895
|
328 |
while (1) {
|
slouken@1895
|
329 |
/* Select pivot */
|
slouken@1895
|
330 |
{
|
slouken@1895
|
331 |
char *mid = first + size * ((last - first) / size >> 1);
|
slouken@1895
|
332 |
Pivot(SWAP_nonaligned, size);
|
slouken@1895
|
333 |
memcpy(pivot, mid, size);
|
slouken@1895
|
334 |
}
|
slouken@1895
|
335 |
/* Partition. */
|
slouken@1895
|
336 |
Partition(SWAP_nonaligned, size);
|
slouken@1895
|
337 |
/* Prepare to recurse/iterate. */
|
slouken@1895
|
338 |
Recurse(trunc)}
|
slouken@1330
|
339 |
}
|
slouken@1895
|
340 |
PreInsertion(SWAP_nonaligned, TRUNC_nonaligned, size);
|
slouken@1895
|
341 |
Insertion(SWAP_nonaligned);
|
slouken@1895
|
342 |
free(pivot);
|
slouken@1330
|
343 |
}
|
slouken@1330
|
344 |
|
slouken@1895
|
345 |
static void
|
slouken@1895
|
346 |
qsort_aligned(void *base, size_t nmemb, size_t size,
|
slouken@1895
|
347 |
int (*compare) (const void *, const void *))
|
slouken@1895
|
348 |
{
|
slouken@1330
|
349 |
|
slouken@1895
|
350 |
stack_entry stack[STACK_SIZE];
|
slouken@1895
|
351 |
int stacktop = 0;
|
slouken@1895
|
352 |
char *first, *last;
|
slouken@1895
|
353 |
char *pivot = malloc(size);
|
slouken@1895
|
354 |
size_t trunc = TRUNC_aligned * size;
|
slouken@1895
|
355 |
assert(pivot != 0);
|
slouken@1330
|
356 |
|
slouken@1895
|
357 |
first = (char *) base;
|
slouken@1895
|
358 |
last = first + (nmemb - 1) * size;
|
slouken@1330
|
359 |
|
slouken@1895
|
360 |
if ((size_t) (last - first) > trunc) {
|
slouken@1895
|
361 |
char *ffirst = first, *llast = last;
|
slouken@1895
|
362 |
while (1) {
|
slouken@1895
|
363 |
/* Select pivot */
|
slouken@1895
|
364 |
{
|
slouken@1895
|
365 |
char *mid = first + size * ((last - first) / size >> 1);
|
slouken@1895
|
366 |
Pivot(SWAP_aligned, size);
|
slouken@1895
|
367 |
memcpy(pivot, mid, size);
|
slouken@1895
|
368 |
}
|
slouken@1895
|
369 |
/* Partition. */
|
slouken@1895
|
370 |
Partition(SWAP_aligned, size);
|
slouken@1895
|
371 |
/* Prepare to recurse/iterate. */
|
slouken@1895
|
372 |
Recurse(trunc)}
|
slouken@1330
|
373 |
}
|
slouken@1895
|
374 |
PreInsertion(SWAP_aligned, TRUNC_aligned, size);
|
slouken@1895
|
375 |
Insertion(SWAP_aligned);
|
slouken@1895
|
376 |
free(pivot);
|
slouken@1330
|
377 |
}
|
slouken@1330
|
378 |
|
slouken@1895
|
379 |
static void
|
slouken@1895
|
380 |
qsort_words(void *base, size_t nmemb,
|
slouken@1895
|
381 |
int (*compare) (const void *, const void *))
|
slouken@1895
|
382 |
{
|
slouken@1330
|
383 |
|
slouken@1895
|
384 |
stack_entry stack[STACK_SIZE];
|
slouken@1895
|
385 |
int stacktop = 0;
|
slouken@1895
|
386 |
char *first, *last;
|
slouken@1895
|
387 |
char *pivot = malloc(WORD_BYTES);
|
slouken@1895
|
388 |
assert(pivot != 0);
|
slouken@1330
|
389 |
|
slouken@1895
|
390 |
first = (char *) base;
|
slouken@1895
|
391 |
last = first + (nmemb - 1) * WORD_BYTES;
|
slouken@1330
|
392 |
|
slouken@1895
|
393 |
if (last - first > TRUNC_words) {
|
slouken@1895
|
394 |
char *ffirst = first, *llast = last;
|
slouken@1895
|
395 |
while (1) {
|
slouken@1330
|
396 |
#ifdef DEBUG_QSORT
|
slouken@1895
|
397 |
fprintf(stderr, "Doing %d:%d: ",
|
slouken@1895
|
398 |
(first - (char *) base) / WORD_BYTES,
|
slouken@1895
|
399 |
(last - (char *) base) / WORD_BYTES);
|
slouken@1330
|
400 |
#endif
|
slouken@1895
|
401 |
/* Select pivot */
|
slouken@1895
|
402 |
{
|
slouken@1895
|
403 |
char *mid =
|
slouken@1895
|
404 |
first + WORD_BYTES * ((last - first) / (2 * WORD_BYTES));
|
slouken@1895
|
405 |
Pivot(SWAP_words, WORD_BYTES);
|
slouken@1895
|
406 |
*(int *) pivot = *(int *) mid;
|
slouken@1895
|
407 |
}
|
slouken@1330
|
408 |
#ifdef DEBUG_QSORT
|
slouken@1895
|
409 |
fprintf(stderr, "pivot=%d\n", *(int *) pivot);
|
slouken@1330
|
410 |
#endif
|
slouken@1895
|
411 |
/* Partition. */
|
slouken@1895
|
412 |
Partition(SWAP_words, WORD_BYTES);
|
slouken@1895
|
413 |
/* Prepare to recurse/iterate. */
|
slouken@1895
|
414 |
Recurse(TRUNC_words)}
|
slouken@1330
|
415 |
}
|
slouken@1895
|
416 |
PreInsertion(SWAP_words, (TRUNC_words / WORD_BYTES), WORD_BYTES);
|
slouken@1895
|
417 |
/* Now do insertion sort. */
|
slouken@1895
|
418 |
last = ((char *) base) + nmemb * WORD_BYTES;
|
slouken@1895
|
419 |
for (first = ((char *) base) + WORD_BYTES; first != last;
|
slouken@1895
|
420 |
first += WORD_BYTES) {
|
slouken@1895
|
421 |
/* Find the right place for |first|. My apologies for var reuse */
|
slouken@1895
|
422 |
int *pl = (int *) (first - WORD_BYTES), *pr = (int *) first;
|
slouken@1895
|
423 |
*(int *) pivot = *(int *) first;
|
slouken@1895
|
424 |
for (; compare(pl, pivot) > 0; pr = pl, --pl) {
|
slouken@1895
|
425 |
*pr = *pl;
|
slouken@1895
|
426 |
}
|
slouken@1895
|
427 |
if (pr != (int *) first)
|
slouken@1895
|
428 |
*pr = *(int *) pivot;
|
slouken@1895
|
429 |
}
|
slouken@1895
|
430 |
free(pivot);
|
slouken@1330
|
431 |
}
|
slouken@1330
|
432 |
|
slouken@1330
|
433 |
/* ---------------------------------------------------------------------- */
|
slouken@1330
|
434 |
|
slouken@1895
|
435 |
void
|
slouken@1895
|
436 |
qsort(void *base, size_t nmemb, size_t size,
|
slouken@1895
|
437 |
int (*compare) (const void *, const void *))
|
slouken@1895
|
438 |
{
|
slouken@1330
|
439 |
|
slouken@1895
|
440 |
if (nmemb <= 1)
|
slouken@1895
|
441 |
return;
|
slouken@1895
|
442 |
if (((uintptr_t) base | size) & (WORD_BYTES - 1))
|
slouken@1895
|
443 |
qsort_nonaligned(base, nmemb, size, compare);
|
slouken@1895
|
444 |
else if (size != WORD_BYTES)
|
slouken@1895
|
445 |
qsort_aligned(base, nmemb, size, compare);
|
slouken@1895
|
446 |
else
|
slouken@1895
|
447 |
qsort_words(base, nmemb, compare);
|
slouken@1330
|
448 |
}
|
slouken@1330
|
449 |
|
slouken@1331
|
450 |
#endif /* !HAVE_QSORT */
|
slouken@1895
|
451 |
/* vi: set ts=4 sw=4 expandtab: */
|