src/stdlib/SDL_iconv.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 13 Mar 2006 17:25:44 +0000
changeset 1519 c99e7a9c9bc9
parent 1513 13a3520a13f9
child 1662 782fd950bd46
child 1849 b5a4ac87b98c
permissions -rw-r--r--
RFC 3629 restricted the range of characters encoded with UTF-8 to 0000-10FFFF (the UTF-16 accessible range)
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This file contains portable iconv functions for SDL */
    25 
    26 #include "SDL_stdinc.h"
    27 #include "SDL_endian.h"
    28 
    29 #ifdef HAVE_ICONV
    30 
    31 #include <errno.h>
    32 
    33 size_t SDL_iconv(SDL_iconv_t cd,
    34                  char **inbuf, size_t *inbytesleft,
    35                  char **outbuf, size_t *outbytesleft)
    36 {
    37 	size_t retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
    38 	if ( retCode == (size_t)-1 ) {
    39 		switch(errno) {
    40 		    case E2BIG:
    41 			return SDL_ICONV_E2BIG;
    42 		    case EILSEQ:
    43 			return SDL_ICONV_EILSEQ;
    44 		    case EINVAL:
    45 			return SDL_ICONV_EINVAL;
    46 		    default:
    47 			return SDL_ICONV_ERROR;
    48 		}
    49 	}
    50 	return retCode;
    51 }
    52 
    53 #else
    54 
    55 /* Lots of useful information on Unicode at:
    56 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    57 */
    58 
    59 #define UNICODE_BOM	0xFEFF
    60 
    61 #define UNKNOWN_ASCII	'?'
    62 #define UNKNOWN_UNICODE	0xFFFD
    63 
    64 enum {
    65 	ENCODING_UNKNOWN,
    66 	ENCODING_ASCII,
    67 	ENCODING_LATIN1,
    68 	ENCODING_UTF8,
    69 	ENCODING_UTF16,		/* Needs byte order marker */
    70 	ENCODING_UTF16BE,
    71 	ENCODING_UTF16LE,
    72 	ENCODING_UTF32,		/* Needs byte order marker */
    73 	ENCODING_UTF32BE,
    74 	ENCODING_UTF32LE,
    75 	ENCODING_UCS2,		/* Native byte order assumed */
    76 	ENCODING_UCS4,		/* Native byte order assumed */
    77 };
    78 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    79 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    80 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    81 #else
    82 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
    83 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
    84 #endif
    85 
    86 struct _SDL_iconv_t
    87 {
    88 	int src_fmt;
    89 	int dst_fmt;
    90 };
    91 
    92 static struct {
    93 	const char *name;
    94 	int format;
    95 } encodings[] = {
    96 	{ "ASCII",	ENCODING_ASCII },
    97 	{ "US-ASCII",	ENCODING_ASCII },
    98 	{ "LATIN1",	ENCODING_LATIN1 },
    99 	{ "ISO-8859-1",	ENCODING_LATIN1 },
   100 	{ "UTF8",	ENCODING_UTF8 },
   101 	{ "UTF-8",	ENCODING_UTF8 },
   102 	{ "UTF16",	ENCODING_UTF16 },
   103 	{ "UTF-16",	ENCODING_UTF16 },
   104 	{ "UTF16BE",	ENCODING_UTF16BE },
   105 	{ "UTF-16BE",	ENCODING_UTF16BE },
   106 	{ "UTF16LE",	ENCODING_UTF16LE },
   107 	{ "UTF-16LE",	ENCODING_UTF16LE },
   108 	{ "UTF32",	ENCODING_UTF32 },
   109 	{ "UTF-32",	ENCODING_UTF32 },
   110 	{ "UTF32BE",	ENCODING_UTF32BE },
   111 	{ "UTF-32BE",	ENCODING_UTF32BE },
   112 	{ "UTF32LE",	ENCODING_UTF32LE },
   113 	{ "UTF-32LE",	ENCODING_UTF32LE },
   114 	{ "UCS2",	ENCODING_UCS2 },
   115 	{ "UCS-2",	ENCODING_UCS2 },
   116 	{ "UCS4",	ENCODING_UCS4 },
   117 	{ "UCS-4",	ENCODING_UCS4 },
   118 };
   119 
   120 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
   121 {
   122 	int src_fmt = ENCODING_UNKNOWN;
   123 	int dst_fmt = ENCODING_UNKNOWN;
   124 	int i;
   125 
   126 	for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
   127 		if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
   128 			src_fmt = encodings[i].format;
   129 			if ( dst_fmt != ENCODING_UNKNOWN ) {
   130 				break;
   131 			}
   132 		}
   133 		if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
   134 			dst_fmt = encodings[i].format;
   135 			if ( src_fmt != ENCODING_UNKNOWN ) {
   136 				break;
   137 			}
   138 		}
   139 	}
   140 	if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
   141 		SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
   142 		if ( cd ) {
   143 			cd->src_fmt = src_fmt;
   144 			cd->dst_fmt = dst_fmt;
   145 			return cd;
   146 		}
   147 	}
   148 	return (SDL_iconv_t)-1;
   149 }
   150 
   151 size_t SDL_iconv(SDL_iconv_t cd,
   152                  char **inbuf, size_t *inbytesleft,
   153                  char **outbuf, size_t *outbytesleft)
   154 {
   155 	/* For simplicity, we'll convert everything to and from UCS-4 */
   156 	char *src, *dst;
   157 	size_t srclen, dstlen;
   158 	Uint32 ch;
   159 	size_t total;
   160 
   161 	if ( !inbuf || !*inbuf ) {
   162 		/* Reset the context */
   163 		return 0;
   164 	}
   165 	if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
   166 		return SDL_ICONV_E2BIG;
   167 	}
   168 	src = *inbuf;
   169 	srclen = (inbytesleft ? *inbytesleft : 0);
   170 	dst = *outbuf;
   171 	dstlen = *outbytesleft;
   172 
   173 	switch ( cd->src_fmt ) {
   174 	    case ENCODING_UTF16:
   175 		/* Scan for a byte order marker */
   176 		{
   177 			Uint8 *p = (Uint8 *)src;
   178 			size_t n = srclen / 2;
   179 			while ( n ) {
   180 				if ( p[0] == 0xFF && p[1] == 0xFE ) {
   181 					cd->src_fmt = ENCODING_UTF16BE;
   182 					break;
   183 				} else if ( p[0] == 0xFE && p[1] == 0xFF ) {
   184 					cd->src_fmt = ENCODING_UTF16LE;
   185 					break;
   186 				}
   187 				p += 2;
   188 				--n;
   189 			}
   190 			if ( n == 0 ) {
   191 				/* We can't tell, default to host order */
   192 				cd->src_fmt = ENCODING_UTF16NATIVE;
   193 			}
   194 		}
   195 		break;
   196 	    case ENCODING_UTF32:
   197 		/* Scan for a byte order marker */
   198 		{
   199 			Uint8 *p = (Uint8 *)src;
   200 			size_t n = srclen / 4;
   201 			while ( n ) {
   202 				if ( p[0] == 0xFF && p[1] == 0xFE &&
   203 				     p[2] == 0x00 && p[3] == 0x00 ) {
   204 					cd->src_fmt = ENCODING_UTF32BE;
   205 					break;
   206 				} else if ( p[0] == 0x00 && p[1] == 0x00 &&
   207 				            p[2] == 0xFE && p[3] == 0xFF ) {
   208 					cd->src_fmt = ENCODING_UTF32LE;
   209 					break;
   210 				}
   211 				p += 4;
   212 				--n;
   213 			}
   214 			if ( n == 0 ) {
   215 				/* We can't tell, default to host order */
   216 				cd->src_fmt = ENCODING_UTF32NATIVE;
   217 			}
   218 		}
   219 		break;
   220 	}
   221 
   222 	switch ( cd->dst_fmt ) {
   223 	    case ENCODING_UTF16:
   224 		/* Default to host order, need to add byte order marker */
   225 		if ( dstlen < 2 ) {
   226 			return SDL_ICONV_E2BIG;
   227 		}
   228 		*(Uint16 *)dst = UNICODE_BOM;
   229 		dst += 2;
   230 		dstlen -= 2;
   231 		cd->dst_fmt = ENCODING_UTF16NATIVE;
   232 		break;
   233 	    case ENCODING_UTF32:
   234 		/* Default to host order, need to add byte order marker */
   235 		if ( dstlen < 4 ) {
   236 			return SDL_ICONV_E2BIG;
   237 		}
   238 		*(Uint32 *)dst = UNICODE_BOM;
   239 		dst += 4;
   240 		dstlen -= 4;
   241 		cd->dst_fmt = ENCODING_UTF32NATIVE;
   242 		break;
   243 	}
   244 
   245 	total = 0;
   246 	while ( srclen > 0 ) {
   247 		/* Decode a character */
   248 		switch ( cd->src_fmt ) {
   249 		    case ENCODING_ASCII:
   250 			{
   251 				Uint8 *p = (Uint8 *)src;
   252 				ch = (Uint32)(p[0] & 0x7F);
   253 				++src;
   254 				--srclen;
   255 			}
   256 			break;
   257 		    case ENCODING_LATIN1:
   258 			{
   259 				Uint8 *p = (Uint8 *)src;
   260 				ch = (Uint32)p[0];
   261 				++src;
   262 				--srclen;
   263 			}
   264 			break;
   265 		    case ENCODING_UTF8: /* RFC 3629 */
   266 			{
   267 				Uint8 *p = (Uint8 *)src;
   268 				size_t left = 0;
   269 				SDL_bool overlong = SDL_FALSE;
   270 				if ( p[0] >= 0xFC ) {
   271 					if ( (p[0] & 0xFE) != 0xFC ) {
   272 						/* Skip illegal sequences
   273 						return SDL_ICONV_EILSEQ;
   274 						*/
   275 						ch = UNKNOWN_UNICODE;
   276 					} else {
   277 						if ( p[0] == 0xFC ) {
   278 							overlong = SDL_TRUE;
   279 						}
   280 						ch = (Uint32)(p[0] & 0x01);
   281 						left = 5;
   282 					}
   283 				} else if ( p[0] >= 0xF8 ) {
   284 					if ( (p[0] & 0xFC) != 0xF8 ) {
   285 						/* Skip illegal sequences
   286 						return SDL_ICONV_EILSEQ;
   287 						*/
   288 						ch = UNKNOWN_UNICODE;
   289 					} else {
   290 						if ( p[0] == 0xF8 ) {
   291 							overlong = SDL_TRUE;
   292 						}
   293 						ch = (Uint32)(p[0] & 0x03);
   294 						left = 4;
   295 					}
   296 				} else if ( p[0] >= 0xF0 ) {
   297 					if ( (p[0] & 0xF8) != 0xF0 ) {
   298 						/* Skip illegal sequences
   299 						return SDL_ICONV_EILSEQ;
   300 						*/
   301 						ch = UNKNOWN_UNICODE;
   302 					} else {
   303 						if ( p[0] == 0xF0 ) {
   304 							overlong = SDL_TRUE;
   305 						}
   306 						ch = (Uint32)(p[0] & 0x07);
   307 						left = 3;
   308 					}
   309 				} else if ( p[0] >= 0xE0 ) {
   310 					if ( (p[0] & 0xF0) != 0xE0 ) {
   311 						/* Skip illegal sequences
   312 						return SDL_ICONV_EILSEQ;
   313 						*/
   314 						ch = UNKNOWN_UNICODE;
   315 					} else {
   316 						if ( p[0] == 0xE0 ) {
   317 							overlong = SDL_TRUE;
   318 						}
   319 						ch = (Uint32)(p[0] & 0x0F);
   320 						left = 2;
   321 					}
   322 				} else if ( p[0] >= 0xC0 ) {
   323 					if ( (p[0] & 0xE0) != 0xC0 ) {
   324 						/* Skip illegal sequences
   325 						return SDL_ICONV_EILSEQ;
   326 						*/
   327 						ch = UNKNOWN_UNICODE;
   328 					} else {
   329 						if ( (p[0] & 0xCE) == 0xC0 ) {
   330 							overlong = SDL_TRUE;
   331 						}
   332 						ch = (Uint32)(p[0] & 0x1F);
   333 						left = 1;
   334 					}
   335 				} else {
   336 					if ( (p[0] & 0x80) != 0x00 ) {
   337 						/* Skip illegal sequences
   338 						return SDL_ICONV_EILSEQ;
   339 						*/
   340 						ch = UNKNOWN_UNICODE;
   341 					} else {
   342 						ch = (Uint32)p[0];
   343 					}
   344 				}
   345 				++src;
   346 				--srclen;
   347 				if ( srclen < left ) {
   348 					return SDL_ICONV_EINVAL;
   349 				}
   350 				while ( left-- ) {
   351 					++p;
   352 					if ( (p[0] & 0xC0) != 0x80 ) {
   353 						/* Skip illegal sequences
   354 						return SDL_ICONV_EILSEQ;
   355 						*/
   356 						ch = UNKNOWN_UNICODE;
   357 						break;
   358 					}
   359 					ch <<= 6;
   360 					ch |= (p[0] & 0x3F);
   361 					++src;
   362 					--srclen;
   363 				}
   364 				if ( overlong ) {
   365 					/* Potential security risk
   366 					return SDL_ICONV_EILSEQ;
   367 					*/
   368 					ch = UNKNOWN_UNICODE;
   369 				}
   370 				if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
   371 				     (ch == 0xFFFE || ch == 0xFFFF) ||
   372 				     ch > 0x10FFFF ) {
   373 					/* Skip illegal sequences
   374 					return SDL_ICONV_EILSEQ;
   375 					*/
   376 					ch = UNKNOWN_UNICODE;
   377 				}
   378 			}
   379 			break;
   380 		    case ENCODING_UTF16BE: /* RFC 2781 */
   381 			{
   382 				Uint8 *p = (Uint8 *)src;
   383 				Uint16 W1, W2;
   384 				if ( srclen < 2 ) {
   385 					return SDL_ICONV_EINVAL;
   386 				}
   387 				W1 = ((Uint16)p[0] << 8) |
   388 				      (Uint16)p[1];
   389 				src += 2;
   390 				srclen -= 2;
   391 				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
   392 					ch = (Uint32)W1;
   393 					break;
   394 				}
   395 				if ( W1 > 0xDBFF ) {
   396 					/* Skip illegal sequences
   397 					return SDL_ICONV_EILSEQ;
   398 					*/
   399 					ch = UNKNOWN_UNICODE;
   400 					break;
   401 				}
   402 				if ( srclen < 2 ) {
   403 					return SDL_ICONV_EINVAL;
   404 				}
   405 				p = (Uint8 *)src;
   406 				W2 = ((Uint16)p[0] << 8) |
   407 				      (Uint16)p[1];
   408 				src += 2;
   409 				srclen -= 2;
   410 				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
   411 					/* Skip illegal sequences
   412 					return SDL_ICONV_EILSEQ;
   413 					*/
   414 					ch = UNKNOWN_UNICODE;
   415 					break;
   416 				}
   417 				ch = (((Uint32)(W1 & 0x3FF) << 10) |
   418 				      (Uint32)(W2 & 0x3FF)) + 0x10000;
   419 			}
   420 			break;
   421 		    case ENCODING_UTF16LE: /* RFC 2781 */
   422 			{
   423 				Uint8 *p = (Uint8 *)src;
   424 				Uint16 W1, W2;
   425 				if ( srclen < 2 ) {
   426 					return SDL_ICONV_EINVAL;
   427 				}
   428 				W1 = ((Uint16)p[1] << 8) |
   429 				      (Uint16)p[0];
   430 				src += 2;
   431 				srclen -= 2;
   432 				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
   433 					ch = (Uint32)W1;
   434 					break;
   435 				}
   436 				if ( W1 > 0xDBFF ) {
   437 					/* Skip illegal sequences
   438 					return SDL_ICONV_EILSEQ;
   439 					*/
   440 					ch = UNKNOWN_UNICODE;
   441 					break;
   442 				}
   443 				if ( srclen < 2 ) {
   444 					return SDL_ICONV_EINVAL;
   445 				}
   446 				p = (Uint8 *)src;
   447 				W2 = ((Uint16)p[1] << 8) |
   448 				      (Uint16)p[0];
   449 				src += 2;
   450 				srclen -= 2;
   451 				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
   452 					/* Skip illegal sequences
   453 					return SDL_ICONV_EILSEQ;
   454 					*/
   455 					ch = UNKNOWN_UNICODE;
   456 					break;
   457 				}
   458 				ch = (((Uint32)(W1 & 0x3FF) << 10) |
   459 				      (Uint32)(W2 & 0x3FF)) + 0x10000;
   460 			}
   461 			break;
   462 		    case ENCODING_UTF32BE:
   463 			{
   464 				Uint8 *p = (Uint8 *)src;
   465 				if ( srclen < 4 ) {
   466 					return SDL_ICONV_EINVAL;
   467 				}
   468 				ch = ((Uint32)p[0] << 24) |
   469 				     ((Uint32)p[1] << 16) |
   470 				     ((Uint32)p[2] << 8) |
   471 				      (Uint32)p[3];
   472 				src += 4;
   473 				srclen -= 4;
   474 			}
   475 			break;
   476 		    case ENCODING_UTF32LE:
   477 			{
   478 				Uint8 *p = (Uint8 *)src;
   479 				if ( srclen < 4 ) {
   480 					return SDL_ICONV_EINVAL;
   481 				}
   482 				ch = ((Uint32)p[3] << 24) |
   483 				     ((Uint32)p[2] << 16) |
   484 				     ((Uint32)p[1] << 8) |
   485 				      (Uint32)p[0];
   486 				src += 4;
   487 				srclen -= 4;
   488 			}
   489 			break;
   490 		    case ENCODING_UCS2:
   491 			{
   492 				Uint16 *p = (Uint16 *)src;
   493 				if ( srclen < 2 ) {
   494 					return SDL_ICONV_EINVAL;
   495 				}
   496 				ch = *p;
   497 				src += 2;
   498 				srclen -= 2;
   499 			}
   500 			break;
   501 		    case ENCODING_UCS4:
   502 			{
   503 				Uint32 *p = (Uint32 *)src;
   504 				if ( srclen < 4 ) {
   505 					return SDL_ICONV_EINVAL;
   506 				}
   507 				ch = *p;
   508 				src += 4;
   509 				srclen -= 4;
   510 			}
   511 			break;
   512 		}
   513 
   514 		/* Encode a character */
   515 		switch ( cd->dst_fmt ) {
   516 		    case ENCODING_ASCII:
   517 			{
   518 				Uint8 *p = (Uint8 *)dst;
   519 				if ( dstlen < 1 ) {
   520 					return SDL_ICONV_E2BIG;
   521 				}
   522 				if ( ch > 0x7F ) {
   523 					*p = UNKNOWN_ASCII;
   524 				} else {
   525 					*p = (Uint8)ch;
   526 				}
   527 				++dst;
   528 				--dstlen;
   529 			}
   530 			break;
   531 		    case ENCODING_LATIN1:
   532 			{
   533 				Uint8 *p = (Uint8 *)dst;
   534 				if ( dstlen < 1 ) {
   535 					return SDL_ICONV_E2BIG;
   536 				}
   537 				if ( ch > 0xFF ) {
   538 					*p = UNKNOWN_ASCII;
   539 				} else {
   540 					*p = (Uint8)ch;
   541 				}
   542 				++dst;
   543 				--dstlen;
   544 			}
   545 			break;
   546 		    case ENCODING_UTF8: /* RFC 3629 */
   547 			{
   548 				Uint8 *p = (Uint8 *)dst;
   549 				if ( ch > 0x10FFFF ) {
   550 					ch = UNKNOWN_UNICODE;
   551 				}
   552 				if ( ch <= 0x7F ) {
   553 					if ( dstlen < 1 ) {
   554 						return SDL_ICONV_E2BIG;
   555 					}
   556 					*p = (Uint8)ch;
   557 					++dst;
   558 					--dstlen;
   559 				} else if ( ch <= 0x7FF ) {
   560 					if ( dstlen < 2 ) {
   561 						return SDL_ICONV_E2BIG;
   562 					}
   563 					p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
   564 					p[1] = 0x80 | (Uint8)(ch & 0x3F);
   565 					dst += 2;
   566 					dstlen -= 2;
   567 				} else if ( ch <= 0xFFFF ) {
   568 					if ( dstlen < 3 ) {
   569 						return SDL_ICONV_E2BIG;
   570 					}
   571 					p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
   572 					p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   573 					p[2] = 0x80 | (Uint8)(ch & 0x3F);
   574 					dst += 3;
   575 					dstlen -= 3;
   576 				} else if ( ch <= 0x1FFFFF ) {
   577 					if ( dstlen < 4 ) {
   578 						return SDL_ICONV_E2BIG;
   579 					}
   580 					p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
   581 					p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   582 					p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   583 					p[3] = 0x80 | (Uint8)(ch & 0x3F);
   584 					dst += 4;
   585 					dstlen -= 4;
   586 				} else if ( ch <= 0x3FFFFFF ) {
   587 					if ( dstlen < 5 ) {
   588 						return SDL_ICONV_E2BIG;
   589 					}
   590 					p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
   591 					p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
   592 					p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   593 					p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   594 					p[4] = 0x80 | (Uint8)(ch & 0x3F);
   595 					dst += 5;
   596 					dstlen -= 5;
   597 				} else {
   598 					if ( dstlen < 6 ) {
   599 						return SDL_ICONV_E2BIG;
   600 					}
   601 					p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
   602 					p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
   603 					p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
   604 					p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
   605 					p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
   606 					p[5] = 0x80 | (Uint8)(ch & 0x3F);
   607 					dst += 6;
   608 					dstlen -= 6;
   609 				}
   610 			}
   611 			break;
   612 		    case ENCODING_UTF16BE: /* RFC 2781 */
   613 			{
   614 				Uint8 *p = (Uint8 *)dst;
   615 				if ( ch > 0x10FFFF ) {
   616 					ch = UNKNOWN_UNICODE;
   617 				}
   618 				if ( ch < 0x10000 ) {
   619 					if ( dstlen < 2 ) {
   620 						return SDL_ICONV_E2BIG;
   621 					}
   622 					p[0] = (Uint8)(ch >> 8);
   623 					p[1] = (Uint8)ch;
   624 					dst += 2;
   625 					dstlen -= 2;
   626 				} else {
   627 					Uint16 W1, W2;
   628 					if ( dstlen < 4 ) {
   629 						return SDL_ICONV_E2BIG;
   630 					}
   631 					ch = ch - 0x10000;
   632 					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
   633 					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
   634 					p[0] = (Uint8)(W1 >> 8);
   635 					p[1] = (Uint8)W1;
   636 					p[2] = (Uint8)(W2 >> 8);
   637 					p[3] = (Uint8)W2;
   638 					dst += 4;
   639 					dstlen -= 4;
   640 				}
   641 			}
   642 			break;
   643 		    case ENCODING_UTF16LE: /* RFC 2781 */
   644 			{
   645 				Uint8 *p = (Uint8 *)dst;
   646 				if ( ch > 0x10FFFF ) {
   647 					ch = UNKNOWN_UNICODE;
   648 				}
   649 				if ( ch < 0x10000 ) {
   650 					if ( dstlen < 2 ) {
   651 						return SDL_ICONV_E2BIG;
   652 					}
   653 					p[1] = (Uint8)(ch >> 8);
   654 					p[0] = (Uint8)ch;
   655 					dst += 2;
   656 					dstlen -= 2;
   657 				} else {
   658 					Uint16 W1, W2;
   659 					if ( dstlen < 4 ) {
   660 						return SDL_ICONV_E2BIG;
   661 					}
   662 					ch = ch - 0x10000;
   663 					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
   664 					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
   665 					p[1] = (Uint8)(W1 >> 8);
   666 					p[0] = (Uint8)W1;
   667 					p[3] = (Uint8)(W2 >> 8);
   668 					p[2] = (Uint8)W2;
   669 					dst += 4;
   670 					dstlen -= 4;
   671 				}
   672 			}
   673 			break;
   674 		    case ENCODING_UTF32BE:
   675 			{
   676 				Uint8 *p = (Uint8 *)dst;
   677 				if ( ch > 0x10FFFF ) {
   678 					ch = UNKNOWN_UNICODE;
   679 				}
   680 				if ( dstlen < 4 ) {
   681 					return SDL_ICONV_E2BIG;
   682 				}
   683 				p[0] = (Uint8)(ch >> 24);
   684 				p[1] = (Uint8)(ch >> 16);
   685 				p[2] = (Uint8)(ch >> 8);
   686 				p[3] = (Uint8)ch;
   687 				dst += 4;
   688 				dstlen -= 4;
   689 			}
   690 			break;
   691 		    case ENCODING_UTF32LE:
   692 			{
   693 				Uint8 *p = (Uint8 *)dst;
   694 				if ( ch > 0x10FFFF ) {
   695 					ch = UNKNOWN_UNICODE;
   696 				}
   697 				if ( dstlen < 4 ) {
   698 					return SDL_ICONV_E2BIG;
   699 				}
   700 				p[3] = (Uint8)(ch >> 24);
   701 				p[2] = (Uint8)(ch >> 16);
   702 				p[1] = (Uint8)(ch >> 8);
   703 				p[0] = (Uint8)ch;
   704 				dst += 4;
   705 				dstlen -= 4;
   706 			}
   707 			break;
   708 		    case ENCODING_UCS2:
   709 			{
   710 				Uint16 *p = (Uint16 *)dst;
   711 				if ( ch > 0xFFFF ) {
   712 					ch = UNKNOWN_UNICODE;
   713 				}
   714 				if ( dstlen < 2 ) {
   715 					return SDL_ICONV_E2BIG;
   716 				}
   717 				*p = (Uint16)ch;
   718 				dst += 2;
   719 				dstlen -= 2;
   720 			}
   721 			break;
   722 		    case ENCODING_UCS4:
   723 			{
   724 				Uint32 *p = (Uint32 *)dst;
   725 				if ( ch > 0x7FFFFFFF ) {
   726 					ch = UNKNOWN_UNICODE;
   727 				}
   728 				if ( dstlen < 4 ) {
   729 					return SDL_ICONV_E2BIG;
   730 				}
   731 				*p = ch;
   732 				dst += 4;
   733 				dstlen -= 4;
   734 			}
   735 			break;
   736 		}
   737 
   738 		/* Update state */
   739 		*inbuf = src;
   740 		*inbytesleft = srclen;
   741 		*outbuf = dst;
   742 		*outbytesleft = dstlen;
   743 		++total;
   744 	}
   745 	return total;
   746 }
   747 
   748 int SDL_iconv_close(SDL_iconv_t cd)
   749 {
   750 	if ( cd && cd != (SDL_iconv_t)-1 ) {
   751 		SDL_free(cd);
   752 	}
   753 	return 0;
   754 }
   755 
   756 #endif /* !HAVE_ICONV */
   757 
   758 char *SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft)
   759 {
   760 	SDL_iconv_t cd;
   761 	char *string;
   762 	size_t stringsize;
   763 	char *outbuf;
   764 	size_t outbytesleft;
   765 	size_t retCode = 0;
   766 
   767 	cd = SDL_iconv_open(tocode, fromcode);
   768 	if ( cd == (SDL_iconv_t)-1 ) {
   769 		return NULL;
   770 	}
   771 
   772 	stringsize = inbytesleft > 4 ? inbytesleft : 4;
   773 	string = SDL_malloc(stringsize);
   774 	if ( !string ) {
   775 		SDL_iconv_close(cd);
   776 		return NULL;
   777 	}
   778 	outbuf = string;
   779 	outbytesleft = stringsize;
   780 	SDL_memset(outbuf, 0, 4);
   781 
   782 	while ( inbytesleft > 0 ) {
   783 		retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   784 		switch (retCode) {
   785 		    case SDL_ICONV_E2BIG:
   786 			{
   787 				char *oldstring = string;
   788 				stringsize *= 2;
   789 				string = SDL_realloc(string, stringsize);
   790 				if ( !string ) {
   791 					SDL_iconv_close(cd);
   792 					return NULL;
   793 				}
   794 				outbuf = string + (outbuf - oldstring);
   795 				outbytesleft = stringsize - (outbuf - string);
   796 				SDL_memset(outbuf, 0, 4);
   797 			}
   798 			break;
   799 		    case SDL_ICONV_EILSEQ:
   800 			/* Try skipping some input data - not perfect, but... */
   801 			++inbuf;
   802 			--inbytesleft;
   803 			break;
   804 		    case SDL_ICONV_EINVAL:
   805 		    case SDL_ICONV_ERROR:
   806 			/* We can't continue... */
   807 			inbytesleft = 0;
   808 			break;
   809 		}
   810 	}
   811 	SDL_iconv_close(cd);
   812 
   813 	return string;
   814 }