src/stdlib/SDL_iconv.c
author Sam Lantinga
Thu, 12 Jul 2007 07:47:29 +0000
branchSDL-1.2
changeset 4054 64bf737873a2
parent 4053 0aadbc81c497
child 4055 6ed1fded55ff
permissions -rw-r--r--
Fixed iconv handling on Solaris 11
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This file contains portable iconv functions for SDL */
    25 
    26 #include "SDL_stdinc.h"
    27 #include "SDL_endian.h"
    28 
    29 #ifdef HAVE_ICONV
    30 
    31 /* Depending on which standard the iconv() was implemented with,
    32    iconv() may or may not use const char ** for the inbuf param.
    33    If we get this wrong, it's just a warning, so no big deal.
    34 */
    35 #if defined(_XGP6) || \
    36     defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
    37 #define ICONV_INBUF_NONCONST
    38 #endif
    39 
    40 #include <errno.h>
    41 
    42 size_t SDL_iconv(SDL_iconv_t cd,
    43                  const char **inbuf, size_t *inbytesleft,
    44                  char **outbuf, size_t *outbytesleft)
    45 {
    46 	size_t retCode;
    47 #ifdef ICONV_INBUF_NONCONST
    48 	retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft);
    49 #else
    50 	retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
    51 #endif
    52 	if ( retCode == (size_t)-1 ) {
    53 		switch(errno) {
    54 		    case E2BIG:
    55 			return SDL_ICONV_E2BIG;
    56 		    case EILSEQ:
    57 			return SDL_ICONV_EILSEQ;
    58 		    case EINVAL:
    59 			return SDL_ICONV_EINVAL;
    60 		    default:
    61 			return SDL_ICONV_ERROR;
    62 		}
    63 	}
    64 	return retCode;
    65 }
    66 
    67 #else
    68 
    69 /* Lots of useful information on Unicode at:
    70 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    71 */
    72 
    73 #define UNICODE_BOM	0xFEFF
    74 
    75 #define UNKNOWN_ASCII	'?'
    76 #define UNKNOWN_UNICODE	0xFFFD
    77 
    78 enum {
    79 	ENCODING_UNKNOWN,
    80 	ENCODING_ASCII,
    81 	ENCODING_LATIN1,
    82 	ENCODING_UTF8,
    83 	ENCODING_UTF16,		/* Needs byte order marker */
    84 	ENCODING_UTF16BE,
    85 	ENCODING_UTF16LE,
    86 	ENCODING_UTF32,		/* Needs byte order marker */
    87 	ENCODING_UTF32BE,
    88 	ENCODING_UTF32LE,
    89 	ENCODING_UCS2,		/* Native byte order assumed */
    90 	ENCODING_UCS4,		/* Native byte order assumed */
    91 };
    92 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    93 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    94 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    95 #else
    96 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
    97 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
    98 #endif
    99 
   100 struct _SDL_iconv_t
   101 {
   102 	int src_fmt;
   103 	int dst_fmt;
   104 };
   105 
   106 static struct {
   107 	const char *name;
   108 	int format;
   109 } encodings[] = {
   110 	{ "ASCII",	ENCODING_ASCII },
   111 	{ "US-ASCII",	ENCODING_ASCII },
   112 	{ "8859-1",	ENCODING_LATIN1 },
   113 	{ "ISO-8859-1",	ENCODING_LATIN1 },
   114 	{ "UTF8",	ENCODING_UTF8 },
   115 	{ "UTF-8",	ENCODING_UTF8 },
   116 	{ "UTF16",	ENCODING_UTF16 },
   117 	{ "UTF-16",	ENCODING_UTF16 },
   118 	{ "UTF16BE",	ENCODING_UTF16BE },
   119 	{ "UTF-16BE",	ENCODING_UTF16BE },
   120 	{ "UTF16LE",	ENCODING_UTF16LE },
   121 	{ "UTF-16LE",	ENCODING_UTF16LE },
   122 	{ "UTF32",	ENCODING_UTF32 },
   123 	{ "UTF-32",	ENCODING_UTF32 },
   124 	{ "UTF32BE",	ENCODING_UTF32BE },
   125 	{ "UTF-32BE",	ENCODING_UTF32BE },
   126 	{ "UTF32LE",	ENCODING_UTF32LE },
   127 	{ "UTF-32LE",	ENCODING_UTF32LE },
   128 	{ "UCS2",	ENCODING_UCS2 },
   129 	{ "UCS-2",	ENCODING_UCS2 },
   130 	{ "UCS4",	ENCODING_UCS4 },
   131 	{ "UCS-4",	ENCODING_UCS4 },
   132 };
   133 
   134 static const char *getlocale()
   135 {
   136 	const char *lang;
   137 
   138 	lang = SDL_getenv("LC_ALL");
   139 	if ( !lang ) {
   140 		lang = SDL_getenv("LC_CTYPE");
   141 	}
   142 	if ( !lang ) {
   143 		lang = SDL_getenv("LC_MESSAGES");
   144 	}
   145 	if ( !lang ) {
   146 		lang = SDL_getenv("LANG");
   147 	}
   148 	if ( !lang || !*lang || SDL_strcmp(lang, "C") == 0 ) {
   149 		lang = "ASCII";
   150 	}
   151 	return lang;
   152 }
   153 
   154 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
   155 {
   156 	int src_fmt = ENCODING_UNKNOWN;
   157 	int dst_fmt = ENCODING_UNKNOWN;
   158 	int i;
   159 
   160 	if ( !fromcode || !*fromcode ) {
   161 		fromcode = getlocale();
   162 	}
   163 	if ( !tocode || !*tocode ) {
   164 		fromcode = getlocale();
   165 	}
   166 	for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
   167 		if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
   168 			src_fmt = encodings[i].format;
   169 			if ( dst_fmt != ENCODING_UNKNOWN ) {
   170 				break;
   171 			}
   172 		}
   173 		if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
   174 			dst_fmt = encodings[i].format;
   175 			if ( src_fmt != ENCODING_UNKNOWN ) {
   176 				break;
   177 			}
   178 		}
   179 	}
   180 	if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
   181 		SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
   182 		if ( cd ) {
   183 			cd->src_fmt = src_fmt;
   184 			cd->dst_fmt = dst_fmt;
   185 			return cd;
   186 		}
   187 	}
   188 	return (SDL_iconv_t)-1;
   189 }
   190 
   191 size_t SDL_iconv(SDL_iconv_t cd,
   192                  const char **inbuf, size_t *inbytesleft,
   193                  char **outbuf, size_t *outbytesleft)
   194 {
   195 	/* For simplicity, we'll convert everything to and from UCS-4 */
   196 	const char *src;
   197 	char *dst;
   198 	size_t srclen, dstlen;
   199 	Uint32 ch = 0;
   200 	size_t total;
   201 
   202 	if ( !inbuf || !*inbuf ) {
   203 		/* Reset the context */
   204 		return 0;
   205 	}
   206 	if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
   207 		return SDL_ICONV_E2BIG;
   208 	}
   209 	src = *inbuf;
   210 	srclen = (inbytesleft ? *inbytesleft : 0);
   211 	dst = *outbuf;
   212 	dstlen = *outbytesleft;
   213 
   214 	switch ( cd->src_fmt ) {
   215 	    case ENCODING_UTF16:
   216 		/* Scan for a byte order marker */
   217 		{
   218 			Uint8 *p = (Uint8 *)src;
   219 			size_t n = srclen / 2;
   220 			while ( n ) {
   221 				if ( p[0] == 0xFF && p[1] == 0xFE ) {
   222 					cd->src_fmt = ENCODING_UTF16BE;
   223 					break;
   224 				} else if ( p[0] == 0xFE && p[1] == 0xFF ) {
   225 					cd->src_fmt = ENCODING_UTF16LE;
   226 					break;
   227 				}
   228 				p += 2;
   229 				--n;
   230 			}
   231 			if ( n == 0 ) {
   232 				/* We can't tell, default to host order */
   233 				cd->src_fmt = ENCODING_UTF16NATIVE;
   234 			}
   235 		}
   236 		break;
   237 	    case ENCODING_UTF32:
   238 		/* Scan for a byte order marker */
   239 		{
   240 			Uint8 *p = (Uint8 *)src;
   241 			size_t n = srclen / 4;
   242 			while ( n ) {
   243 				if ( p[0] == 0xFF && p[1] == 0xFE &&
   244 				     p[2] == 0x00 && p[3] == 0x00 ) {
   245 					cd->src_fmt = ENCODING_UTF32BE;
   246 					break;
   247 				} else if ( p[0] == 0x00 && p[1] == 0x00 &&
   248 				            p[2] == 0xFE && p[3] == 0xFF ) {
   249 					cd->src_fmt = ENCODING_UTF32LE;
   250 					break;
   251 				}
   252 				p += 4;
   253 				--n;
   254 			}
   255 			if ( n == 0 ) {
   256 				/* We can't tell, default to host order */
   257 				cd->src_fmt = ENCODING_UTF32NATIVE;
   258 			}
   259 		}
   260 		break;
   261 	}
   262 
   263 	switch ( cd->dst_fmt ) {
   264 	    case ENCODING_UTF16:
   265 		/* Default to host order, need to add byte order marker */
   266 		if ( dstlen < 2 ) {
   267 			return SDL_ICONV_E2BIG;
   268 		}
   269 		*(Uint16 *)dst = UNICODE_BOM;
   270 		dst += 2;
   271 		dstlen -= 2;
   272 		cd->dst_fmt = ENCODING_UTF16NATIVE;
   273 		break;
   274 	    case ENCODING_UTF32:
   275 		/* Default to host order, need to add byte order marker */
   276 		if ( dstlen < 4 ) {
   277 			return SDL_ICONV_E2BIG;
   278 		}
   279 		*(Uint32 *)dst = UNICODE_BOM;
   280 		dst += 4;
   281 		dstlen -= 4;
   282 		cd->dst_fmt = ENCODING_UTF32NATIVE;
   283 		break;
   284 	}
   285 
   286 	total = 0;
   287 	while ( srclen > 0 ) {
   288 		/* Decode a character */
   289 		switch ( cd->src_fmt ) {
   290 		    case ENCODING_ASCII:
   291 			{
   292 				Uint8 *p = (Uint8 *)src;
   293 				ch = (Uint32)(p[0] & 0x7F);
   294 				++src;
   295 				--srclen;
   296 			}
   297 			break;
   298 		    case ENCODING_LATIN1:
   299 			{
   300 				Uint8 *p = (Uint8 *)src;
   301 				ch = (Uint32)p[0];
   302 				++src;
   303 				--srclen;
   304 			}
   305 			break;
   306 		    case ENCODING_UTF8: /* RFC 3629 */
   307 			{
   308 				Uint8 *p = (Uint8 *)src;
   309 				size_t left = 0;
   310 				SDL_bool overlong = SDL_FALSE;
   311 				if ( p[0] >= 0xFC ) {
   312 					if ( (p[0] & 0xFE) != 0xFC ) {
   313 						/* Skip illegal sequences
   314 						return SDL_ICONV_EILSEQ;
   315 						*/
   316 						ch = UNKNOWN_UNICODE;
   317 					} else {
   318 						if ( p[0] == 0xFC ) {
   319 							overlong = SDL_TRUE;
   320 						}
   321 						ch = (Uint32)(p[0] & 0x01);
   322 						left = 5;
   323 					}
   324 				} else if ( p[0] >= 0xF8 ) {
   325 					if ( (p[0] & 0xFC) != 0xF8 ) {
   326 						/* Skip illegal sequences
   327 						return SDL_ICONV_EILSEQ;
   328 						*/
   329 						ch = UNKNOWN_UNICODE;
   330 					} else {
   331 						if ( p[0] == 0xF8 ) {
   332 							overlong = SDL_TRUE;
   333 						}
   334 						ch = (Uint32)(p[0] & 0x03);
   335 						left = 4;
   336 					}
   337 				} else if ( p[0] >= 0xF0 ) {
   338 					if ( (p[0] & 0xF8) != 0xF0 ) {
   339 						/* Skip illegal sequences
   340 						return SDL_ICONV_EILSEQ;
   341 						*/
   342 						ch = UNKNOWN_UNICODE;
   343 					} else {
   344 						if ( p[0] == 0xF0 ) {
   345 							overlong = SDL_TRUE;
   346 						}
   347 						ch = (Uint32)(p[0] & 0x07);
   348 						left = 3;
   349 					}
   350 				} else if ( p[0] >= 0xE0 ) {
   351 					if ( (p[0] & 0xF0) != 0xE0 ) {
   352 						/* Skip illegal sequences
   353 						return SDL_ICONV_EILSEQ;
   354 						*/
   355 						ch = UNKNOWN_UNICODE;
   356 					} else {
   357 						if ( p[0] == 0xE0 ) {
   358 							overlong = SDL_TRUE;
   359 						}
   360 						ch = (Uint32)(p[0] & 0x0F);
   361 						left = 2;
   362 					}
   363 				} else if ( p[0] >= 0xC0 ) {
   364 					if ( (p[0] & 0xE0) != 0xC0 ) {
   365 						/* Skip illegal sequences
   366 						return SDL_ICONV_EILSEQ;
   367 						*/
   368 						ch = UNKNOWN_UNICODE;
   369 					} else {
   370 						if ( (p[0] & 0xCE) == 0xC0 ) {
   371 							overlong = SDL_TRUE;
   372 						}
   373 						ch = (Uint32)(p[0] & 0x1F);
   374 						left = 1;
   375 					}
   376 				} else {
   377 					if ( (p[0] & 0x80) != 0x00 ) {
   378 						/* Skip illegal sequences
   379 						return SDL_ICONV_EILSEQ;
   380 						*/
   381 						ch = UNKNOWN_UNICODE;
   382 					} else {
   383 						ch = (Uint32)p[0];
   384 					}
   385 				}
   386 				++src;
   387 				--srclen;
   388 				if ( srclen < left ) {
   389 					return SDL_ICONV_EINVAL;
   390 				}
   391 				while ( left-- ) {
   392 					++p;
   393 					if ( (p[0] & 0xC0) != 0x80 ) {
   394 						/* Skip illegal sequences
   395 						return SDL_ICONV_EILSEQ;
   396 						*/
   397 						ch = UNKNOWN_UNICODE;
   398 						break;
   399 					}
   400 					ch <<= 6;
   401 					ch |= (p[0] & 0x3F);
   402 					++src;
   403 					--srclen;
   404 				}
   405 				if ( overlong ) {
   406 					/* Potential security risk
   407 					return SDL_ICONV_EILSEQ;
   408 					*/
   409 					ch = UNKNOWN_UNICODE;
   410 				}
   411 				if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
   412 				     (ch == 0xFFFE || ch == 0xFFFF) ||
   413 				     ch > 0x10FFFF ) {
   414 					/* Skip illegal sequences
   415 					return SDL_ICONV_EILSEQ;
   416 					*/
   417 					ch = UNKNOWN_UNICODE;
   418 				}
   419 			}
   420 			break;
   421 		    case ENCODING_UTF16BE: /* RFC 2781 */
   422 			{
   423 				Uint8 *p = (Uint8 *)src;
   424 				Uint16 W1, W2;
   425 				if ( srclen < 2 ) {
   426 					return SDL_ICONV_EINVAL;
   427 				}
   428 				W1 = ((Uint16)p[0] << 8) |
   429 				      (Uint16)p[1];
   430 				src += 2;
   431 				srclen -= 2;
   432 				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
   433 					ch = (Uint32)W1;
   434 					break;
   435 				}
   436 				if ( W1 > 0xDBFF ) {
   437 					/* Skip illegal sequences
   438 					return SDL_ICONV_EILSEQ;
   439 					*/
   440 					ch = UNKNOWN_UNICODE;
   441 					break;
   442 				}
   443 				if ( srclen < 2 ) {
   444 					return SDL_ICONV_EINVAL;
   445 				}
   446 				p = (Uint8 *)src;
   447 				W2 = ((Uint16)p[0] << 8) |
   448 				      (Uint16)p[1];
   449 				src += 2;
   450 				srclen -= 2;
   451 				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
   452 					/* Skip illegal sequences
   453 					return SDL_ICONV_EILSEQ;
   454 					*/
   455 					ch = UNKNOWN_UNICODE;
   456 					break;
   457 				}
   458 				ch = (((Uint32)(W1 & 0x3FF) << 10) |
   459 				      (Uint32)(W2 & 0x3FF)) + 0x10000;
   460 			}
   461 			break;
   462 		    case ENCODING_UTF16LE: /* RFC 2781 */
   463 			{
   464 				Uint8 *p = (Uint8 *)src;
   465 				Uint16 W1, W2;
   466 				if ( srclen < 2 ) {
   467 					return SDL_ICONV_EINVAL;
   468 				}
   469 				W1 = ((Uint16)p[1] << 8) |
   470 				      (Uint16)p[0];
   471 				src += 2;
   472 				srclen -= 2;
   473 				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
   474 					ch = (Uint32)W1;
   475 					break;
   476 				}
   477 				if ( W1 > 0xDBFF ) {
   478 					/* Skip illegal sequences
   479 					return SDL_ICONV_EILSEQ;
   480 					*/
   481 					ch = UNKNOWN_UNICODE;
   482 					break;
   483 				}
   484 				if ( srclen < 2 ) {
   485 					return SDL_ICONV_EINVAL;
   486 				}
   487 				p = (Uint8 *)src;
   488 				W2 = ((Uint16)p[1] << 8) |
   489 				      (Uint16)p[0];
   490 				src += 2;
   491 				srclen -= 2;
   492 				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
   493 					/* Skip illegal sequences
   494 					return SDL_ICONV_EILSEQ;
   495 					*/
   496 					ch = UNKNOWN_UNICODE;
   497 					break;
   498 				}
   499 				ch = (((Uint32)(W1 & 0x3FF) << 10) |
   500 				      (Uint32)(W2 & 0x3FF)) + 0x10000;
   501 			}
   502 			break;
   503 		    case ENCODING_UTF32BE:
   504 			{
   505 				Uint8 *p = (Uint8 *)src;
   506 				if ( srclen < 4 ) {
   507 					return SDL_ICONV_EINVAL;
   508 				}
   509 				ch = ((Uint32)p[0] << 24) |
   510 				     ((Uint32)p[1] << 16) |
   511 				     ((Uint32)p[2] << 8) |
   512 				      (Uint32)p[3];
   513 				src += 4;
   514 				srclen -= 4;
   515 			}
   516 			break;
   517 		    case ENCODING_UTF32LE:
   518 			{
   519 				Uint8 *p = (Uint8 *)src;
   520 				if ( srclen < 4 ) {
   521 					return SDL_ICONV_EINVAL;
   522 				}
   523 				ch = ((Uint32)p[3] << 24) |
   524 				     ((Uint32)p[2] << 16) |
   525 				     ((Uint32)p[1] << 8) |
   526 				      (Uint32)p[0];
   527 				src += 4;
   528 				srclen -= 4;
   529 			}
   530 			break;
   531 		    case ENCODING_UCS2:
   532 			{
   533 				Uint16 *p = (Uint16 *)src;
   534 				if ( srclen < 2 ) {
   535 					return SDL_ICONV_EINVAL;
   536 				}
   537 				ch = *p;
   538 				src += 2;
   539 				srclen -= 2;
   540 			}
   541 			break;
   542 		    case ENCODING_UCS4:
   543 			{
   544 				Uint32 *p = (Uint32 *)src;
   545 				if ( srclen < 4 ) {
   546 					return SDL_ICONV_EINVAL;
   547 				}
   548 				ch = *p;
   549 				src += 4;
   550 				srclen -= 4;
   551 			}
   552 			break;
   553 		}
   554 
   555 		/* Encode a character */
   556 		switch ( cd->dst_fmt ) {
   557 		    case ENCODING_ASCII:
   558 			{
   559 				Uint8 *p = (Uint8 *)dst;
   560 				if ( dstlen < 1 ) {
   561 					return SDL_ICONV_E2BIG;
   562 				}
   563 				if ( ch > 0x7F ) {
   564 					*p = UNKNOWN_ASCII;
   565 				} else {
   566 					*p = (Uint8)ch;
   567 				}
   568 				++dst;
   569 				--dstlen;
   570 			}
   571 			break;
   572 		    case ENCODING_LATIN1:
   573 			{
   574 				Uint8 *p = (Uint8 *)dst;
   575 				if ( dstlen < 1 ) {
   576 					return SDL_ICONV_E2BIG;
   577 				}
   578 				if ( ch > 0xFF ) {
   579 					*p = UNKNOWN_ASCII;
   580 				} else {
   581 					*p = (Uint8)ch;
   582 				}
   583 				++dst;
   584 				--dstlen;
   585 			}
   586 			break;
   587 		    case ENCODING_UTF8: /* RFC 3629 */
   588 			{
   589 				Uint8 *p = (Uint8 *)dst;
   590 				if ( ch > 0x10FFFF ) {
   591 					ch = UNKNOWN_UNICODE;
   592 				}
   593 				if ( ch <= 0x7F ) {
   594 					if ( dstlen < 1 ) {
   595 						return SDL_ICONV_E2BIG;
   596 					}
   597 					*p = (Uint8)ch;
   598 					++dst;
   599 					--dstlen;
   600 				} else if ( ch <= 0x7FF ) {
   601 					if ( dstlen < 2 ) {
   602 						return SDL_ICONV_E2BIG;
   603 					}
   604 					p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
   605 					p[1] = 0x80 | (Uint8)(ch & 0x3F);
   606 					dst += 2;
   607 					dstlen -= 2;
   608 				} else if ( ch <= 0xFFFF ) {
   609 					if ( dstlen < 3 ) {
   610 						return SDL_ICONV_E2BIG;
   611 					}
   612 					p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
   613 					p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   614 					p[2] = 0x80 | (Uint8)(ch & 0x3F);
   615 					dst += 3;
   616 					dstlen -= 3;
   617 				} else if ( ch <= 0x1FFFFF ) {
   618 					if ( dstlen < 4 ) {
   619 						return SDL_ICONV_E2BIG;
   620 					}
   621 					p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
   622 					p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   623 					p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   624 					p[3] = 0x80 | (Uint8)(ch & 0x3F);
   625 					dst += 4;
   626 					dstlen -= 4;
   627 				} else if ( ch <= 0x3FFFFFF ) {
   628 					if ( dstlen < 5 ) {
   629 						return SDL_ICONV_E2BIG;
   630 					}
   631 					p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
   632 					p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
   633 					p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   634 					p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   635 					p[4] = 0x80 | (Uint8)(ch & 0x3F);
   636 					dst += 5;
   637 					dstlen -= 5;
   638 				} else {
   639 					if ( dstlen < 6 ) {
   640 						return SDL_ICONV_E2BIG;
   641 					}
   642 					p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
   643 					p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
   644 					p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
   645 					p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   646 					p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   647 					p[5] = 0x80 | (Uint8)(ch & 0x3F);
   648 					dst += 6;
   649 					dstlen -= 6;
   650 				}
   651 			}
   652 			break;
   653 		    case ENCODING_UTF16BE: /* RFC 2781 */
   654 			{
   655 				Uint8 *p = (Uint8 *)dst;
   656 				if ( ch > 0x10FFFF ) {
   657 					ch = UNKNOWN_UNICODE;
   658 				}
   659 				if ( ch < 0x10000 ) {
   660 					if ( dstlen < 2 ) {
   661 						return SDL_ICONV_E2BIG;
   662 					}
   663 					p[0] = (Uint8)(ch >> 8);
   664 					p[1] = (Uint8)ch;
   665 					dst += 2;
   666 					dstlen -= 2;
   667 				} else {
   668 					Uint16 W1, W2;
   669 					if ( dstlen < 4 ) {
   670 						return SDL_ICONV_E2BIG;
   671 					}
   672 					ch = ch - 0x10000;
   673 					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
   674 					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
   675 					p[0] = (Uint8)(W1 >> 8);
   676 					p[1] = (Uint8)W1;
   677 					p[2] = (Uint8)(W2 >> 8);
   678 					p[3] = (Uint8)W2;
   679 					dst += 4;
   680 					dstlen -= 4;
   681 				}
   682 			}
   683 			break;
   684 		    case ENCODING_UTF16LE: /* RFC 2781 */
   685 			{
   686 				Uint8 *p = (Uint8 *)dst;
   687 				if ( ch > 0x10FFFF ) {
   688 					ch = UNKNOWN_UNICODE;
   689 				}
   690 				if ( ch < 0x10000 ) {
   691 					if ( dstlen < 2 ) {
   692 						return SDL_ICONV_E2BIG;
   693 					}
   694 					p[1] = (Uint8)(ch >> 8);
   695 					p[0] = (Uint8)ch;
   696 					dst += 2;
   697 					dstlen -= 2;
   698 				} else {
   699 					Uint16 W1, W2;
   700 					if ( dstlen < 4 ) {
   701 						return SDL_ICONV_E2BIG;
   702 					}
   703 					ch = ch - 0x10000;
   704 					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
   705 					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
   706 					p[1] = (Uint8)(W1 >> 8);
   707 					p[0] = (Uint8)W1;
   708 					p[3] = (Uint8)(W2 >> 8);
   709 					p[2] = (Uint8)W2;
   710 					dst += 4;
   711 					dstlen -= 4;
   712 				}
   713 			}
   714 			break;
   715 		    case ENCODING_UTF32BE:
   716 			{
   717 				Uint8 *p = (Uint8 *)dst;
   718 				if ( ch > 0x10FFFF ) {
   719 					ch = UNKNOWN_UNICODE;
   720 				}
   721 				if ( dstlen < 4 ) {
   722 					return SDL_ICONV_E2BIG;
   723 				}
   724 				p[0] = (Uint8)(ch >> 24);
   725 				p[1] = (Uint8)(ch >> 16);
   726 				p[2] = (Uint8)(ch >> 8);
   727 				p[3] = (Uint8)ch;
   728 				dst += 4;
   729 				dstlen -= 4;
   730 			}
   731 			break;
   732 		    case ENCODING_UTF32LE:
   733 			{
   734 				Uint8 *p = (Uint8 *)dst;
   735 				if ( ch > 0x10FFFF ) {
   736 					ch = UNKNOWN_UNICODE;
   737 				}
   738 				if ( dstlen < 4 ) {
   739 					return SDL_ICONV_E2BIG;
   740 				}
   741 				p[3] = (Uint8)(ch >> 24);
   742 				p[2] = (Uint8)(ch >> 16);
   743 				p[1] = (Uint8)(ch >> 8);
   744 				p[0] = (Uint8)ch;
   745 				dst += 4;
   746 				dstlen -= 4;
   747 			}
   748 			break;
   749 		    case ENCODING_UCS2:
   750 			{
   751 				Uint16 *p = (Uint16 *)dst;
   752 				if ( ch > 0xFFFF ) {
   753 					ch = UNKNOWN_UNICODE;
   754 				}
   755 				if ( dstlen < 2 ) {
   756 					return SDL_ICONV_E2BIG;
   757 				}
   758 				*p = (Uint16)ch;
   759 				dst += 2;
   760 				dstlen -= 2;
   761 			}
   762 			break;
   763 		    case ENCODING_UCS4:
   764 			{
   765 				Uint32 *p = (Uint32 *)dst;
   766 				if ( ch > 0x7FFFFFFF ) {
   767 					ch = UNKNOWN_UNICODE;
   768 				}
   769 				if ( dstlen < 4 ) {
   770 					return SDL_ICONV_E2BIG;
   771 				}
   772 				*p = ch;
   773 				dst += 4;
   774 				dstlen -= 4;
   775 			}
   776 			break;
   777 		}
   778 
   779 		/* Update state */
   780 		*inbuf = src;
   781 		*inbytesleft = srclen;
   782 		*outbuf = dst;
   783 		*outbytesleft = dstlen;
   784 		++total;
   785 	}
   786 	return total;
   787 }
   788 
   789 int SDL_iconv_close(SDL_iconv_t cd)
   790 {
   791 	if ( cd && cd != (SDL_iconv_t)-1 ) {
   792 		SDL_free(cd);
   793 	}
   794 	return 0;
   795 }
   796 
   797 #endif /* !HAVE_ICONV */
   798 
   799 char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
   800 {
   801 	SDL_iconv_t cd;
   802 	char *string;
   803 	size_t stringsize;
   804 	char *outbuf;
   805 	size_t outbytesleft;
   806 	size_t retCode = 0;
   807 
   808 	cd = SDL_iconv_open(tocode, fromcode);
   809 	if ( cd == (SDL_iconv_t)-1 ) {
   810 		/* See if we can recover here (fixes iconv on Solaris 11) */
   811 		if ( !tocode || !*tocode ) {
   812 			tocode = "UTF-8";
   813 		}
   814 		if ( !fromcode || !*fromcode ) {
   815 			tocode = "UTF-8";
   816 		}
   817 		cd = SDL_iconv_open(tocode, fromcode);
   818 	}
   819 	if ( cd == (SDL_iconv_t)-1 ) {
   820 		return NULL;
   821 	}
   822 
   823 	stringsize = inbytesleft > 4 ? inbytesleft : 4;
   824 	string = SDL_malloc(stringsize);
   825 	if ( !string ) {
   826 		SDL_iconv_close(cd);
   827 		return NULL;
   828 	}
   829 	outbuf = string;
   830 	outbytesleft = stringsize;
   831 	SDL_memset(outbuf, 0, 4);
   832 
   833 	while ( inbytesleft > 0 ) {
   834 		retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   835 		switch (retCode) {
   836 		    case SDL_ICONV_E2BIG:
   837 			{
   838 				char *oldstring = string;
   839 				stringsize *= 2;
   840 				string = SDL_realloc(string, stringsize);
   841 				if ( !string ) {
   842 					SDL_iconv_close(cd);
   843 					return NULL;
   844 				}
   845 				outbuf = string + (outbuf - oldstring);
   846 				outbytesleft = stringsize - (outbuf - string);
   847 				SDL_memset(outbuf, 0, 4);
   848 			}
   849 			break;
   850 		    case SDL_ICONV_EILSEQ:
   851 			/* Try skipping some input data - not perfect, but... */
   852 			++inbuf;
   853 			--inbytesleft;
   854 			break;
   855 		    case SDL_ICONV_EINVAL:
   856 		    case SDL_ICONV_ERROR:
   857 			/* We can't continue... */
   858 			inbytesleft = 0;
   859 			break;
   860 		}
   861 	}
   862 	SDL_iconv_close(cd);
   863 
   864 	return string;
   865 }