src/stdlib/SDL_iconv.c
author Sam Lantinga
Thu, 28 Jun 2007 06:53:09 +0000
branchSDL-1.2
changeset 3986 4f73308bbb32
parent 3985 2f8efcf14c83
child 3987 00486a9c2893
permissions -rw-r--r--
Whoops, need to actually copy inbuf
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This file contains portable iconv functions for SDL */
    25 
    26 #include "SDL_stdinc.h"
    27 #include "SDL_endian.h"
    28 
    29 #ifdef HAVE_ICONV
    30 
    31 #include <errno.h>
    32 
    33 size_t SDL_iconv(SDL_iconv_t cd,
    34                  const char **inbuf, size_t *inbytesleft,
    35                  char **outbuf, size_t *outbytesleft)
    36 {
    37 	size_t retCode;
    38 #ifdef ICONV_REALLY_MODIFIES_INBUF
    39 	if ( inbuf && *inbuf && inbytesleft ) {
    40 		char *tmp = SDL_stack_alloc(char, *inbytesleft);
    41 		char *ptr = tmp;
    42 		SDL_memcpy(tmp, inbuf, *inbytesleft);
    43 		retCode = iconv(cd, &ptr, inbytesleft, outbuf, outbytesleft);
    44 		inbuf += (ptr - tmp);
    45 		SDL_stack_free(tmp);
    46 	} else {
    47 		retCode = iconv(cd, NULL, inbytesleft, outbuf, outbytesleft);
    48 	}
    49 #else
    50 	retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft);
    51 #endif
    52 	if ( retCode == (size_t)-1 ) {
    53 		switch(errno) {
    54 		    case E2BIG:
    55 			return SDL_ICONV_E2BIG;
    56 		    case EILSEQ:
    57 			return SDL_ICONV_EILSEQ;
    58 		    case EINVAL:
    59 			return SDL_ICONV_EINVAL;
    60 		    default:
    61 			return SDL_ICONV_ERROR;
    62 		}
    63 	}
    64 	return retCode;
    65 }
    66 
    67 #else
    68 
    69 /* Lots of useful information on Unicode at:
    70 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    71 */
    72 
    73 #define UNICODE_BOM	0xFEFF
    74 
    75 #define UNKNOWN_ASCII	'?'
    76 #define UNKNOWN_UNICODE	0xFFFD
    77 
    78 enum {
    79 	ENCODING_UNKNOWN,
    80 	ENCODING_ASCII,
    81 	ENCODING_LATIN1,
    82 	ENCODING_UTF8,
    83 	ENCODING_UTF16,		/* Needs byte order marker */
    84 	ENCODING_UTF16BE,
    85 	ENCODING_UTF16LE,
    86 	ENCODING_UTF32,		/* Needs byte order marker */
    87 	ENCODING_UTF32BE,
    88 	ENCODING_UTF32LE,
    89 	ENCODING_UCS2,		/* Native byte order assumed */
    90 	ENCODING_UCS4,		/* Native byte order assumed */
    91 };
    92 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    93 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    94 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    95 #else
    96 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
    97 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
    98 #endif
    99 
   100 struct _SDL_iconv_t
   101 {
   102 	int src_fmt;
   103 	int dst_fmt;
   104 };
   105 
   106 static struct {
   107 	const char *name;
   108 	int format;
   109 } encodings[] = {
   110 	{ "ASCII",	ENCODING_ASCII },
   111 	{ "US-ASCII",	ENCODING_ASCII },
   112 	{ "LATIN1",	ENCODING_LATIN1 },
   113 	{ "ISO-8859-1",	ENCODING_LATIN1 },
   114 	{ "UTF8",	ENCODING_UTF8 },
   115 	{ "UTF-8",	ENCODING_UTF8 },
   116 	{ "UTF16",	ENCODING_UTF16 },
   117 	{ "UTF-16",	ENCODING_UTF16 },
   118 	{ "UTF16BE",	ENCODING_UTF16BE },
   119 	{ "UTF-16BE",	ENCODING_UTF16BE },
   120 	{ "UTF16LE",	ENCODING_UTF16LE },
   121 	{ "UTF-16LE",	ENCODING_UTF16LE },
   122 	{ "UTF32",	ENCODING_UTF32 },
   123 	{ "UTF-32",	ENCODING_UTF32 },
   124 	{ "UTF32BE",	ENCODING_UTF32BE },
   125 	{ "UTF-32BE",	ENCODING_UTF32BE },
   126 	{ "UTF32LE",	ENCODING_UTF32LE },
   127 	{ "UTF-32LE",	ENCODING_UTF32LE },
   128 	{ "UCS2",	ENCODING_UCS2 },
   129 	{ "UCS-2",	ENCODING_UCS2 },
   130 	{ "UCS4",	ENCODING_UCS4 },
   131 	{ "UCS-4",	ENCODING_UCS4 },
   132 };
   133 
   134 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
   135 {
   136 	int src_fmt = ENCODING_UNKNOWN;
   137 	int dst_fmt = ENCODING_UNKNOWN;
   138 	int i;
   139 
   140 	for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
   141 		if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
   142 			src_fmt = encodings[i].format;
   143 			if ( dst_fmt != ENCODING_UNKNOWN ) {
   144 				break;
   145 			}
   146 		}
   147 		if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
   148 			dst_fmt = encodings[i].format;
   149 			if ( src_fmt != ENCODING_UNKNOWN ) {
   150 				break;
   151 			}
   152 		}
   153 	}
   154 	if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
   155 		SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
   156 		if ( cd ) {
   157 			cd->src_fmt = src_fmt;
   158 			cd->dst_fmt = dst_fmt;
   159 			return cd;
   160 		}
   161 	}
   162 	return (SDL_iconv_t)-1;
   163 }
   164 
   165 size_t SDL_iconv(SDL_iconv_t cd,
   166                  const char **inbuf, size_t *inbytesleft,
   167                  char **outbuf, size_t *outbytesleft)
   168 {
   169 	/* For simplicity, we'll convert everything to and from UCS-4 */
   170 	const char *src;
   171 	char *dst;
   172 	size_t srclen, dstlen;
   173 	Uint32 ch = 0;
   174 	size_t total;
   175 
   176 	if ( !inbuf || !*inbuf ) {
   177 		/* Reset the context */
   178 		return 0;
   179 	}
   180 	if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
   181 		return SDL_ICONV_E2BIG;
   182 	}
   183 	src = *inbuf;
   184 	srclen = (inbytesleft ? *inbytesleft : 0);
   185 	dst = *outbuf;
   186 	dstlen = *outbytesleft;
   187 
   188 	switch ( cd->src_fmt ) {
   189 	    case ENCODING_UTF16:
   190 		/* Scan for a byte order marker */
   191 		{
   192 			Uint8 *p = (Uint8 *)src;
   193 			size_t n = srclen / 2;
   194 			while ( n ) {
   195 				if ( p[0] == 0xFF && p[1] == 0xFE ) {
   196 					cd->src_fmt = ENCODING_UTF16BE;
   197 					break;
   198 				} else if ( p[0] == 0xFE && p[1] == 0xFF ) {
   199 					cd->src_fmt = ENCODING_UTF16LE;
   200 					break;
   201 				}
   202 				p += 2;
   203 				--n;
   204 			}
   205 			if ( n == 0 ) {
   206 				/* We can't tell, default to host order */
   207 				cd->src_fmt = ENCODING_UTF16NATIVE;
   208 			}
   209 		}
   210 		break;
   211 	    case ENCODING_UTF32:
   212 		/* Scan for a byte order marker */
   213 		{
   214 			Uint8 *p = (Uint8 *)src;
   215 			size_t n = srclen / 4;
   216 			while ( n ) {
   217 				if ( p[0] == 0xFF && p[1] == 0xFE &&
   218 				     p[2] == 0x00 && p[3] == 0x00 ) {
   219 					cd->src_fmt = ENCODING_UTF32BE;
   220 					break;
   221 				} else if ( p[0] == 0x00 && p[1] == 0x00 &&
   222 				            p[2] == 0xFE && p[3] == 0xFF ) {
   223 					cd->src_fmt = ENCODING_UTF32LE;
   224 					break;
   225 				}
   226 				p += 4;
   227 				--n;
   228 			}
   229 			if ( n == 0 ) {
   230 				/* We can't tell, default to host order */
   231 				cd->src_fmt = ENCODING_UTF32NATIVE;
   232 			}
   233 		}
   234 		break;
   235 	}
   236 
   237 	switch ( cd->dst_fmt ) {
   238 	    case ENCODING_UTF16:
   239 		/* Default to host order, need to add byte order marker */
   240 		if ( dstlen < 2 ) {
   241 			return SDL_ICONV_E2BIG;
   242 		}
   243 		*(Uint16 *)dst = UNICODE_BOM;
   244 		dst += 2;
   245 		dstlen -= 2;
   246 		cd->dst_fmt = ENCODING_UTF16NATIVE;
   247 		break;
   248 	    case ENCODING_UTF32:
   249 		/* Default to host order, need to add byte order marker */
   250 		if ( dstlen < 4 ) {
   251 			return SDL_ICONV_E2BIG;
   252 		}
   253 		*(Uint32 *)dst = UNICODE_BOM;
   254 		dst += 4;
   255 		dstlen -= 4;
   256 		cd->dst_fmt = ENCODING_UTF32NATIVE;
   257 		break;
   258 	}
   259 
   260 	total = 0;
   261 	while ( srclen > 0 ) {
   262 		/* Decode a character */
   263 		switch ( cd->src_fmt ) {
   264 		    case ENCODING_ASCII:
   265 			{
   266 				Uint8 *p = (Uint8 *)src;
   267 				ch = (Uint32)(p[0] & 0x7F);
   268 				++src;
   269 				--srclen;
   270 			}
   271 			break;
   272 		    case ENCODING_LATIN1:
   273 			{
   274 				Uint8 *p = (Uint8 *)src;
   275 				ch = (Uint32)p[0];
   276 				++src;
   277 				--srclen;
   278 			}
   279 			break;
   280 		    case ENCODING_UTF8: /* RFC 3629 */
   281 			{
   282 				Uint8 *p = (Uint8 *)src;
   283 				size_t left = 0;
   284 				SDL_bool overlong = SDL_FALSE;
   285 				if ( p[0] >= 0xFC ) {
   286 					if ( (p[0] & 0xFE) != 0xFC ) {
   287 						/* Skip illegal sequences
   288 						return SDL_ICONV_EILSEQ;
   289 						*/
   290 						ch = UNKNOWN_UNICODE;
   291 					} else {
   292 						if ( p[0] == 0xFC ) {
   293 							overlong = SDL_TRUE;
   294 						}
   295 						ch = (Uint32)(p[0] & 0x01);
   296 						left = 5;
   297 					}
   298 				} else if ( p[0] >= 0xF8 ) {
   299 					if ( (p[0] & 0xFC) != 0xF8 ) {
   300 						/* Skip illegal sequences
   301 						return SDL_ICONV_EILSEQ;
   302 						*/
   303 						ch = UNKNOWN_UNICODE;
   304 					} else {
   305 						if ( p[0] == 0xF8 ) {
   306 							overlong = SDL_TRUE;
   307 						}
   308 						ch = (Uint32)(p[0] & 0x03);
   309 						left = 4;
   310 					}
   311 				} else if ( p[0] >= 0xF0 ) {
   312 					if ( (p[0] & 0xF8) != 0xF0 ) {
   313 						/* Skip illegal sequences
   314 						return SDL_ICONV_EILSEQ;
   315 						*/
   316 						ch = UNKNOWN_UNICODE;
   317 					} else {
   318 						if ( p[0] == 0xF0 ) {
   319 							overlong = SDL_TRUE;
   320 						}
   321 						ch = (Uint32)(p[0] & 0x07);
   322 						left = 3;
   323 					}
   324 				} else if ( p[0] >= 0xE0 ) {
   325 					if ( (p[0] & 0xF0) != 0xE0 ) {
   326 						/* Skip illegal sequences
   327 						return SDL_ICONV_EILSEQ;
   328 						*/
   329 						ch = UNKNOWN_UNICODE;
   330 					} else {
   331 						if ( p[0] == 0xE0 ) {
   332 							overlong = SDL_TRUE;
   333 						}
   334 						ch = (Uint32)(p[0] & 0x0F);
   335 						left = 2;
   336 					}
   337 				} else if ( p[0] >= 0xC0 ) {
   338 					if ( (p[0] & 0xE0) != 0xC0 ) {
   339 						/* Skip illegal sequences
   340 						return SDL_ICONV_EILSEQ;
   341 						*/
   342 						ch = UNKNOWN_UNICODE;
   343 					} else {
   344 						if ( (p[0] & 0xCE) == 0xC0 ) {
   345 							overlong = SDL_TRUE;
   346 						}
   347 						ch = (Uint32)(p[0] & 0x1F);
   348 						left = 1;
   349 					}
   350 				} else {
   351 					if ( (p[0] & 0x80) != 0x00 ) {
   352 						/* Skip illegal sequences
   353 						return SDL_ICONV_EILSEQ;
   354 						*/
   355 						ch = UNKNOWN_UNICODE;
   356 					} else {
   357 						ch = (Uint32)p[0];
   358 					}
   359 				}
   360 				++src;
   361 				--srclen;
   362 				if ( srclen < left ) {
   363 					return SDL_ICONV_EINVAL;
   364 				}
   365 				while ( left-- ) {
   366 					++p;
   367 					if ( (p[0] & 0xC0) != 0x80 ) {
   368 						/* Skip illegal sequences
   369 						return SDL_ICONV_EILSEQ;
   370 						*/
   371 						ch = UNKNOWN_UNICODE;
   372 						break;
   373 					}
   374 					ch <<= 6;
   375 					ch |= (p[0] & 0x3F);
   376 					++src;
   377 					--srclen;
   378 				}
   379 				if ( overlong ) {
   380 					/* Potential security risk
   381 					return SDL_ICONV_EILSEQ;
   382 					*/
   383 					ch = UNKNOWN_UNICODE;
   384 				}
   385 				if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
   386 				     (ch == 0xFFFE || ch == 0xFFFF) ||
   387 				     ch > 0x10FFFF ) {
   388 					/* Skip illegal sequences
   389 					return SDL_ICONV_EILSEQ;
   390 					*/
   391 					ch = UNKNOWN_UNICODE;
   392 				}
   393 			}
   394 			break;
   395 		    case ENCODING_UTF16BE: /* RFC 2781 */
   396 			{
   397 				Uint8 *p = (Uint8 *)src;
   398 				Uint16 W1, W2;
   399 				if ( srclen < 2 ) {
   400 					return SDL_ICONV_EINVAL;
   401 				}
   402 				W1 = ((Uint16)p[0] << 8) |
   403 				      (Uint16)p[1];
   404 				src += 2;
   405 				srclen -= 2;
   406 				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
   407 					ch = (Uint32)W1;
   408 					break;
   409 				}
   410 				if ( W1 > 0xDBFF ) {
   411 					/* Skip illegal sequences
   412 					return SDL_ICONV_EILSEQ;
   413 					*/
   414 					ch = UNKNOWN_UNICODE;
   415 					break;
   416 				}
   417 				if ( srclen < 2 ) {
   418 					return SDL_ICONV_EINVAL;
   419 				}
   420 				p = (Uint8 *)src;
   421 				W2 = ((Uint16)p[0] << 8) |
   422 				      (Uint16)p[1];
   423 				src += 2;
   424 				srclen -= 2;
   425 				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
   426 					/* Skip illegal sequences
   427 					return SDL_ICONV_EILSEQ;
   428 					*/
   429 					ch = UNKNOWN_UNICODE;
   430 					break;
   431 				}
   432 				ch = (((Uint32)(W1 & 0x3FF) << 10) |
   433 				      (Uint32)(W2 & 0x3FF)) + 0x10000;
   434 			}
   435 			break;
   436 		    case ENCODING_UTF16LE: /* RFC 2781 */
   437 			{
   438 				Uint8 *p = (Uint8 *)src;
   439 				Uint16 W1, W2;
   440 				if ( srclen < 2 ) {
   441 					return SDL_ICONV_EINVAL;
   442 				}
   443 				W1 = ((Uint16)p[1] << 8) |
   444 				      (Uint16)p[0];
   445 				src += 2;
   446 				srclen -= 2;
   447 				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
   448 					ch = (Uint32)W1;
   449 					break;
   450 				}
   451 				if ( W1 > 0xDBFF ) {
   452 					/* Skip illegal sequences
   453 					return SDL_ICONV_EILSEQ;
   454 					*/
   455 					ch = UNKNOWN_UNICODE;
   456 					break;
   457 				}
   458 				if ( srclen < 2 ) {
   459 					return SDL_ICONV_EINVAL;
   460 				}
   461 				p = (Uint8 *)src;
   462 				W2 = ((Uint16)p[1] << 8) |
   463 				      (Uint16)p[0];
   464 				src += 2;
   465 				srclen -= 2;
   466 				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
   467 					/* Skip illegal sequences
   468 					return SDL_ICONV_EILSEQ;
   469 					*/
   470 					ch = UNKNOWN_UNICODE;
   471 					break;
   472 				}
   473 				ch = (((Uint32)(W1 & 0x3FF) << 10) |
   474 				      (Uint32)(W2 & 0x3FF)) + 0x10000;
   475 			}
   476 			break;
   477 		    case ENCODING_UTF32BE:
   478 			{
   479 				Uint8 *p = (Uint8 *)src;
   480 				if ( srclen < 4 ) {
   481 					return SDL_ICONV_EINVAL;
   482 				}
   483 				ch = ((Uint32)p[0] << 24) |
   484 				     ((Uint32)p[1] << 16) |
   485 				     ((Uint32)p[2] << 8) |
   486 				      (Uint32)p[3];
   487 				src += 4;
   488 				srclen -= 4;
   489 			}
   490 			break;
   491 		    case ENCODING_UTF32LE:
   492 			{
   493 				Uint8 *p = (Uint8 *)src;
   494 				if ( srclen < 4 ) {
   495 					return SDL_ICONV_EINVAL;
   496 				}
   497 				ch = ((Uint32)p[3] << 24) |
   498 				     ((Uint32)p[2] << 16) |
   499 				     ((Uint32)p[1] << 8) |
   500 				      (Uint32)p[0];
   501 				src += 4;
   502 				srclen -= 4;
   503 			}
   504 			break;
   505 		    case ENCODING_UCS2:
   506 			{
   507 				Uint16 *p = (Uint16 *)src;
   508 				if ( srclen < 2 ) {
   509 					return SDL_ICONV_EINVAL;
   510 				}
   511 				ch = *p;
   512 				src += 2;
   513 				srclen -= 2;
   514 			}
   515 			break;
   516 		    case ENCODING_UCS4:
   517 			{
   518 				Uint32 *p = (Uint32 *)src;
   519 				if ( srclen < 4 ) {
   520 					return SDL_ICONV_EINVAL;
   521 				}
   522 				ch = *p;
   523 				src += 4;
   524 				srclen -= 4;
   525 			}
   526 			break;
   527 		}
   528 
   529 		/* Encode a character */
   530 		switch ( cd->dst_fmt ) {
   531 		    case ENCODING_ASCII:
   532 			{
   533 				Uint8 *p = (Uint8 *)dst;
   534 				if ( dstlen < 1 ) {
   535 					return SDL_ICONV_E2BIG;
   536 				}
   537 				if ( ch > 0x7F ) {
   538 					*p = UNKNOWN_ASCII;
   539 				} else {
   540 					*p = (Uint8)ch;
   541 				}
   542 				++dst;
   543 				--dstlen;
   544 			}
   545 			break;
   546 		    case ENCODING_LATIN1:
   547 			{
   548 				Uint8 *p = (Uint8 *)dst;
   549 				if ( dstlen < 1 ) {
   550 					return SDL_ICONV_E2BIG;
   551 				}
   552 				if ( ch > 0xFF ) {
   553 					*p = UNKNOWN_ASCII;
   554 				} else {
   555 					*p = (Uint8)ch;
   556 				}
   557 				++dst;
   558 				--dstlen;
   559 			}
   560 			break;
   561 		    case ENCODING_UTF8: /* RFC 3629 */
   562 			{
   563 				Uint8 *p = (Uint8 *)dst;
   564 				if ( ch > 0x10FFFF ) {
   565 					ch = UNKNOWN_UNICODE;
   566 				}
   567 				if ( ch <= 0x7F ) {
   568 					if ( dstlen < 1 ) {
   569 						return SDL_ICONV_E2BIG;
   570 					}
   571 					*p = (Uint8)ch;
   572 					++dst;
   573 					--dstlen;
   574 				} else if ( ch <= 0x7FF ) {
   575 					if ( dstlen < 2 ) {
   576 						return SDL_ICONV_E2BIG;
   577 					}
   578 					p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
   579 					p[1] = 0x80 | (Uint8)(ch & 0x3F);
   580 					dst += 2;
   581 					dstlen -= 2;
   582 				} else if ( ch <= 0xFFFF ) {
   583 					if ( dstlen < 3 ) {
   584 						return SDL_ICONV_E2BIG;
   585 					}
   586 					p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
   587 					p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   588 					p[2] = 0x80 | (Uint8)(ch & 0x3F);
   589 					dst += 3;
   590 					dstlen -= 3;
   591 				} else if ( ch <= 0x1FFFFF ) {
   592 					if ( dstlen < 4 ) {
   593 						return SDL_ICONV_E2BIG;
   594 					}
   595 					p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
   596 					p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   597 					p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   598 					p[3] = 0x80 | (Uint8)(ch & 0x3F);
   599 					dst += 4;
   600 					dstlen -= 4;
   601 				} else if ( ch <= 0x3FFFFFF ) {
   602 					if ( dstlen < 5 ) {
   603 						return SDL_ICONV_E2BIG;
   604 					}
   605 					p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
   606 					p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
   607 					p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   608 					p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   609 					p[4] = 0x80 | (Uint8)(ch & 0x3F);
   610 					dst += 5;
   611 					dstlen -= 5;
   612 				} else {
   613 					if ( dstlen < 6 ) {
   614 						return SDL_ICONV_E2BIG;
   615 					}
   616 					p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
   617 					p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
   618 					p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
   619 					p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   620 					p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   621 					p[5] = 0x80 | (Uint8)(ch & 0x3F);
   622 					dst += 6;
   623 					dstlen -= 6;
   624 				}
   625 			}
   626 			break;
   627 		    case ENCODING_UTF16BE: /* RFC 2781 */
   628 			{
   629 				Uint8 *p = (Uint8 *)dst;
   630 				if ( ch > 0x10FFFF ) {
   631 					ch = UNKNOWN_UNICODE;
   632 				}
   633 				if ( ch < 0x10000 ) {
   634 					if ( dstlen < 2 ) {
   635 						return SDL_ICONV_E2BIG;
   636 					}
   637 					p[0] = (Uint8)(ch >> 8);
   638 					p[1] = (Uint8)ch;
   639 					dst += 2;
   640 					dstlen -= 2;
   641 				} else {
   642 					Uint16 W1, W2;
   643 					if ( dstlen < 4 ) {
   644 						return SDL_ICONV_E2BIG;
   645 					}
   646 					ch = ch - 0x10000;
   647 					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
   648 					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
   649 					p[0] = (Uint8)(W1 >> 8);
   650 					p[1] = (Uint8)W1;
   651 					p[2] = (Uint8)(W2 >> 8);
   652 					p[3] = (Uint8)W2;
   653 					dst += 4;
   654 					dstlen -= 4;
   655 				}
   656 			}
   657 			break;
   658 		    case ENCODING_UTF16LE: /* RFC 2781 */
   659 			{
   660 				Uint8 *p = (Uint8 *)dst;
   661 				if ( ch > 0x10FFFF ) {
   662 					ch = UNKNOWN_UNICODE;
   663 				}
   664 				if ( ch < 0x10000 ) {
   665 					if ( dstlen < 2 ) {
   666 						return SDL_ICONV_E2BIG;
   667 					}
   668 					p[1] = (Uint8)(ch >> 8);
   669 					p[0] = (Uint8)ch;
   670 					dst += 2;
   671 					dstlen -= 2;
   672 				} else {
   673 					Uint16 W1, W2;
   674 					if ( dstlen < 4 ) {
   675 						return SDL_ICONV_E2BIG;
   676 					}
   677 					ch = ch - 0x10000;
   678 					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
   679 					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
   680 					p[1] = (Uint8)(W1 >> 8);
   681 					p[0] = (Uint8)W1;
   682 					p[3] = (Uint8)(W2 >> 8);
   683 					p[2] = (Uint8)W2;
   684 					dst += 4;
   685 					dstlen -= 4;
   686 				}
   687 			}
   688 			break;
   689 		    case ENCODING_UTF32BE:
   690 			{
   691 				Uint8 *p = (Uint8 *)dst;
   692 				if ( ch > 0x10FFFF ) {
   693 					ch = UNKNOWN_UNICODE;
   694 				}
   695 				if ( dstlen < 4 ) {
   696 					return SDL_ICONV_E2BIG;
   697 				}
   698 				p[0] = (Uint8)(ch >> 24);
   699 				p[1] = (Uint8)(ch >> 16);
   700 				p[2] = (Uint8)(ch >> 8);
   701 				p[3] = (Uint8)ch;
   702 				dst += 4;
   703 				dstlen -= 4;
   704 			}
   705 			break;
   706 		    case ENCODING_UTF32LE:
   707 			{
   708 				Uint8 *p = (Uint8 *)dst;
   709 				if ( ch > 0x10FFFF ) {
   710 					ch = UNKNOWN_UNICODE;
   711 				}
   712 				if ( dstlen < 4 ) {
   713 					return SDL_ICONV_E2BIG;
   714 				}
   715 				p[3] = (Uint8)(ch >> 24);
   716 				p[2] = (Uint8)(ch >> 16);
   717 				p[1] = (Uint8)(ch >> 8);
   718 				p[0] = (Uint8)ch;
   719 				dst += 4;
   720 				dstlen -= 4;
   721 			}
   722 			break;
   723 		    case ENCODING_UCS2:
   724 			{
   725 				Uint16 *p = (Uint16 *)dst;
   726 				if ( ch > 0xFFFF ) {
   727 					ch = UNKNOWN_UNICODE;
   728 				}
   729 				if ( dstlen < 2 ) {
   730 					return SDL_ICONV_E2BIG;
   731 				}
   732 				*p = (Uint16)ch;
   733 				dst += 2;
   734 				dstlen -= 2;
   735 			}
   736 			break;
   737 		    case ENCODING_UCS4:
   738 			{
   739 				Uint32 *p = (Uint32 *)dst;
   740 				if ( ch > 0x7FFFFFFF ) {
   741 					ch = UNKNOWN_UNICODE;
   742 				}
   743 				if ( dstlen < 4 ) {
   744 					return SDL_ICONV_E2BIG;
   745 				}
   746 				*p = ch;
   747 				dst += 4;
   748 				dstlen -= 4;
   749 			}
   750 			break;
   751 		}
   752 
   753 		/* Update state */
   754 		*inbuf = src;
   755 		*inbytesleft = srclen;
   756 		*outbuf = dst;
   757 		*outbytesleft = dstlen;
   758 		++total;
   759 	}
   760 	return total;
   761 }
   762 
   763 int SDL_iconv_close(SDL_iconv_t cd)
   764 {
   765 	if ( cd && cd != (SDL_iconv_t)-1 ) {
   766 		SDL_free(cd);
   767 	}
   768 	return 0;
   769 }
   770 
   771 #endif /* !HAVE_ICONV */
   772 
   773 char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
   774 {
   775 	SDL_iconv_t cd;
   776 	char *string;
   777 	size_t stringsize;
   778 	char *outbuf;
   779 	size_t outbytesleft;
   780 	size_t retCode = 0;
   781 
   782 	cd = SDL_iconv_open(tocode, fromcode);
   783 	if ( cd == (SDL_iconv_t)-1 ) {
   784 		return NULL;
   785 	}
   786 
   787 	stringsize = inbytesleft > 4 ? inbytesleft : 4;
   788 	string = SDL_malloc(stringsize);
   789 	if ( !string ) {
   790 		SDL_iconv_close(cd);
   791 		return NULL;
   792 	}
   793 	outbuf = string;
   794 	outbytesleft = stringsize;
   795 	SDL_memset(outbuf, 0, 4);
   796 
   797 	while ( inbytesleft > 0 ) {
   798 		retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   799 		switch (retCode) {
   800 		    case SDL_ICONV_E2BIG:
   801 			{
   802 				char *oldstring = string;
   803 				stringsize *= 2;
   804 				string = SDL_realloc(string, stringsize);
   805 				if ( !string ) {
   806 					SDL_iconv_close(cd);
   807 					return NULL;
   808 				}
   809 				outbuf = string + (outbuf - oldstring);
   810 				outbytesleft = stringsize - (outbuf - string);
   811 				SDL_memset(outbuf, 0, 4);
   812 			}
   813 			break;
   814 		    case SDL_ICONV_EILSEQ:
   815 			/* Try skipping some input data - not perfect, but... */
   816 			++inbuf;
   817 			--inbytesleft;
   818 			break;
   819 		    case SDL_ICONV_EINVAL:
   820 		    case SDL_ICONV_ERROR:
   821 			/* We can't continue... */
   822 			inbytesleft = 0;
   823 			break;
   824 		}
   825 	}
   826 	SDL_iconv_close(cd);
   827 
   828 	return string;
   829 }