src/stdlib/SDL_iconv.c
author Sam Lantinga
Thu, 12 Jul 2007 07:47:29 +0000
branchSDL-1.2
changeset 4054 64bf737873a2
parent 4053 0aadbc81c497
child 4055 6ed1fded55ff
permissions -rw-r--r--
Fixed iconv handling on Solaris 11
slouken@1501
     1
/*
slouken@1501
     2
    SDL - Simple DirectMedia Layer
slouken@1501
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@1501
     4
slouken@1501
     5
    This library is free software; you can redistribute it and/or
slouken@1501
     6
    modify it under the terms of the GNU Lesser General Public
slouken@1501
     7
    License as published by the Free Software Foundation; either
slouken@1501
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@1501
     9
slouken@1501
    10
    This library is distributed in the hope that it will be useful,
slouken@1501
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@1501
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1501
    13
    Lesser General Public License for more details.
slouken@1501
    14
slouken@1501
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1501
    16
    License along with this library; if not, write to the Free Software
slouken@1501
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@1501
    18
slouken@1501
    19
    Sam Lantinga
slouken@1501
    20
    slouken@libsdl.org
slouken@1501
    21
*/
slouken@1501
    22
#include "SDL_config.h"
slouken@1501
    23
slouken@1501
    24
/* This file contains portable iconv functions for SDL */
slouken@1501
    25
slouken@1501
    26
#include "SDL_stdinc.h"
slouken@1501
    27
#include "SDL_endian.h"
slouken@1501
    28
slouken@1501
    29
#ifdef HAVE_ICONV
slouken@1501
    30
slouken@3987
    31
/* Depending on which standard the iconv() was implemented with,
slouken@3987
    32
   iconv() may or may not use const char ** for the inbuf param.
slouken@3987
    33
   If we get this wrong, it's just a warning, so no big deal.
slouken@3987
    34
*/
slouken@3987
    35
#if defined(_XGP6) || \
slouken@3987
    36
    defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
slouken@3987
    37
#define ICONV_INBUF_NONCONST
slouken@3987
    38
#endif
slouken@3987
    39
slouken@1501
    40
#include <errno.h>
slouken@1501
    41
slouken@1501
    42
size_t SDL_iconv(SDL_iconv_t cd,
slouken@3984
    43
                 const char **inbuf, size_t *inbytesleft,
slouken@1501
    44
                 char **outbuf, size_t *outbytesleft)
slouken@1501
    45
{
slouken@3985
    46
	size_t retCode;
slouken@3987
    47
#ifdef ICONV_INBUF_NONCONST
slouken@3987
    48
	retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft);
slouken@3985
    49
#else
slouken@3987
    50
	retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
slouken@3985
    51
#endif
slouken@1501
    52
	if ( retCode == (size_t)-1 ) {
slouken@1501
    53
		switch(errno) {
slouken@1501
    54
		    case E2BIG:
slouken@1501
    55
			return SDL_ICONV_E2BIG;
slouken@1501
    56
		    case EILSEQ:
slouken@1501
    57
			return SDL_ICONV_EILSEQ;
slouken@1501
    58
		    case EINVAL:
slouken@1501
    59
			return SDL_ICONV_EINVAL;
slouken@1501
    60
		    default:
slouken@1501
    61
			return SDL_ICONV_ERROR;
slouken@1501
    62
		}
slouken@1501
    63
	}
slouken@1501
    64
	return retCode;
slouken@1501
    65
}
slouken@1501
    66
slouken@1501
    67
#else
slouken@1501
    68
slouken@1503
    69
/* Lots of useful information on Unicode at:
slouken@1503
    70
	http://www.cl.cam.ac.uk/~mgk25/unicode.html
slouken@1503
    71
*/
slouken@1503
    72
slouken@1501
    73
#define UNICODE_BOM	0xFEFF
slouken@1501
    74
slouken@1501
    75
#define UNKNOWN_ASCII	'?'
slouken@1501
    76
#define UNKNOWN_UNICODE	0xFFFD
slouken@1501
    77
slouken@1501
    78
enum {
slouken@1501
    79
	ENCODING_UNKNOWN,
slouken@1501
    80
	ENCODING_ASCII,
slouken@1501
    81
	ENCODING_LATIN1,
slouken@1501
    82
	ENCODING_UTF8,
slouken@1501
    83
	ENCODING_UTF16,		/* Needs byte order marker */
slouken@1501
    84
	ENCODING_UTF16BE,
slouken@1501
    85
	ENCODING_UTF16LE,
slouken@1501
    86
	ENCODING_UTF32,		/* Needs byte order marker */
slouken@1501
    87
	ENCODING_UTF32BE,
slouken@1501
    88
	ENCODING_UTF32LE,
slouken@1501
    89
	ENCODING_UCS2,		/* Native byte order assumed */
slouken@1501
    90
	ENCODING_UCS4,		/* Native byte order assumed */
slouken@1501
    91
};
slouken@1501
    92
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
slouken@1501
    93
#define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
slouken@1501
    94
#define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
slouken@1501
    95
#else
slouken@1501
    96
#define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
slouken@1501
    97
#define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
slouken@1501
    98
#endif
slouken@1501
    99
slouken@1501
   100
struct _SDL_iconv_t
slouken@1501
   101
{
slouken@1501
   102
	int src_fmt;
slouken@1501
   103
	int dst_fmt;
slouken@1501
   104
};
slouken@1501
   105
slouken@1501
   106
static struct {
slouken@1501
   107
	const char *name;
slouken@1501
   108
	int format;
slouken@1501
   109
} encodings[] = {
slouken@1501
   110
	{ "ASCII",	ENCODING_ASCII },
slouken@1501
   111
	{ "US-ASCII",	ENCODING_ASCII },
slouken@3997
   112
	{ "8859-1",	ENCODING_LATIN1 },
slouken@1501
   113
	{ "ISO-8859-1",	ENCODING_LATIN1 },
slouken@1501
   114
	{ "UTF8",	ENCODING_UTF8 },
slouken@1501
   115
	{ "UTF-8",	ENCODING_UTF8 },
slouken@1501
   116
	{ "UTF16",	ENCODING_UTF16 },
slouken@1501
   117
	{ "UTF-16",	ENCODING_UTF16 },
slouken@1501
   118
	{ "UTF16BE",	ENCODING_UTF16BE },
slouken@1501
   119
	{ "UTF-16BE",	ENCODING_UTF16BE },
slouken@1501
   120
	{ "UTF16LE",	ENCODING_UTF16LE },
slouken@1501
   121
	{ "UTF-16LE",	ENCODING_UTF16LE },
slouken@1501
   122
	{ "UTF32",	ENCODING_UTF32 },
slouken@1501
   123
	{ "UTF-32",	ENCODING_UTF32 },
slouken@1501
   124
	{ "UTF32BE",	ENCODING_UTF32BE },
slouken@1501
   125
	{ "UTF-32BE",	ENCODING_UTF32BE },
slouken@1501
   126
	{ "UTF32LE",	ENCODING_UTF32LE },
slouken@1501
   127
	{ "UTF-32LE",	ENCODING_UTF32LE },
slouken@1501
   128
	{ "UCS2",	ENCODING_UCS2 },
slouken@1501
   129
	{ "UCS-2",	ENCODING_UCS2 },
slouken@1501
   130
	{ "UCS4",	ENCODING_UCS4 },
slouken@1501
   131
	{ "UCS-4",	ENCODING_UCS4 },
slouken@1501
   132
};
slouken@1501
   133
slouken@4053
   134
static const char *getlocale()
slouken@4053
   135
{
slouken@4053
   136
	const char *lang;
slouken@4053
   137
slouken@4053
   138
	lang = SDL_getenv("LC_ALL");
slouken@4053
   139
	if ( !lang ) {
slouken@4053
   140
		lang = SDL_getenv("LC_CTYPE");
slouken@4053
   141
	}
slouken@4053
   142
	if ( !lang ) {
slouken@4053
   143
		lang = SDL_getenv("LC_MESSAGES");
slouken@4053
   144
	}
slouken@4053
   145
	if ( !lang ) {
slouken@4053
   146
		lang = SDL_getenv("LANG");
slouken@4053
   147
	}
slouken@4053
   148
	if ( !lang || !*lang || SDL_strcmp(lang, "C") == 0 ) {
slouken@4053
   149
		lang = "ASCII";
slouken@4053
   150
	}
slouken@4053
   151
	return lang;
slouken@4053
   152
}
slouken@4053
   153
slouken@1501
   154
SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
slouken@1501
   155
{
slouken@1501
   156
	int src_fmt = ENCODING_UNKNOWN;
slouken@1501
   157
	int dst_fmt = ENCODING_UNKNOWN;
slouken@1501
   158
	int i;
slouken@1501
   159
slouken@4053
   160
	if ( !fromcode || !*fromcode ) {
slouken@4053
   161
		fromcode = getlocale();
slouken@4053
   162
	}
slouken@4053
   163
	if ( !tocode || !*tocode ) {
slouken@4053
   164
		fromcode = getlocale();
slouken@4053
   165
	}
slouken@1501
   166
	for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
slouken@1501
   167
		if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
slouken@1501
   168
			src_fmt = encodings[i].format;
slouken@1501
   169
			if ( dst_fmt != ENCODING_UNKNOWN ) {
slouken@1501
   170
				break;
slouken@1501
   171
			}
slouken@1501
   172
		}
slouken@1501
   173
		if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
slouken@1501
   174
			dst_fmt = encodings[i].format;
slouken@1501
   175
			if ( src_fmt != ENCODING_UNKNOWN ) {
slouken@1501
   176
				break;
slouken@1501
   177
			}
slouken@1501
   178
		}
slouken@1501
   179
	}
slouken@1501
   180
	if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
slouken@1501
   181
		SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
slouken@1501
   182
		if ( cd ) {
slouken@1501
   183
			cd->src_fmt = src_fmt;
slouken@1501
   184
			cd->dst_fmt = dst_fmt;
slouken@1501
   185
			return cd;
slouken@1501
   186
		}
slouken@1501
   187
	}
slouken@1501
   188
	return (SDL_iconv_t)-1;
slouken@1501
   189
}
slouken@1501
   190
slouken@1501
   191
size_t SDL_iconv(SDL_iconv_t cd,
icculus@3918
   192
                 const char **inbuf, size_t *inbytesleft,
slouken@1501
   193
                 char **outbuf, size_t *outbytesleft)
slouken@1501
   194
{
slouken@1501
   195
	/* For simplicity, we'll convert everything to and from UCS-4 */
icculus@3918
   196
	const char *src;
icculus@3918
   197
	char *dst;
slouken@1501
   198
	size_t srclen, dstlen;
slouken@1849
   199
	Uint32 ch = 0;
slouken@1501
   200
	size_t total;
slouken@1501
   201
slouken@1501
   202
	if ( !inbuf || !*inbuf ) {
slouken@1501
   203
		/* Reset the context */
slouken@1501
   204
		return 0;
slouken@1501
   205
	}
slouken@1501
   206
	if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
slouken@1501
   207
		return SDL_ICONV_E2BIG;
slouken@1501
   208
	}
slouken@1501
   209
	src = *inbuf;
slouken@1501
   210
	srclen = (inbytesleft ? *inbytesleft : 0);
slouken@1501
   211
	dst = *outbuf;
slouken@1501
   212
	dstlen = *outbytesleft;
slouken@1501
   213
slouken@1501
   214
	switch ( cd->src_fmt ) {
slouken@1501
   215
	    case ENCODING_UTF16:
slouken@1501
   216
		/* Scan for a byte order marker */
slouken@1501
   217
		{
slouken@1501
   218
			Uint8 *p = (Uint8 *)src;
slouken@1501
   219
			size_t n = srclen / 2;
slouken@1501
   220
			while ( n ) {
slouken@1501
   221
				if ( p[0] == 0xFF && p[1] == 0xFE ) {
slouken@1501
   222
					cd->src_fmt = ENCODING_UTF16BE;
slouken@1501
   223
					break;
slouken@1501
   224
				} else if ( p[0] == 0xFE && p[1] == 0xFF ) {
slouken@1501
   225
					cd->src_fmt = ENCODING_UTF16LE;
slouken@1501
   226
					break;
slouken@1501
   227
				}
slouken@1501
   228
				p += 2;
slouken@1501
   229
				--n;
slouken@1501
   230
			}
slouken@1501
   231
			if ( n == 0 ) {
slouken@1501
   232
				/* We can't tell, default to host order */
slouken@1501
   233
				cd->src_fmt = ENCODING_UTF16NATIVE;
slouken@1501
   234
			}
slouken@1501
   235
		}
slouken@1501
   236
		break;
slouken@1501
   237
	    case ENCODING_UTF32:
slouken@1501
   238
		/* Scan for a byte order marker */
slouken@1501
   239
		{
slouken@1501
   240
			Uint8 *p = (Uint8 *)src;
slouken@1501
   241
			size_t n = srclen / 4;
slouken@1501
   242
			while ( n ) {
slouken@1501
   243
				if ( p[0] == 0xFF && p[1] == 0xFE &&
slouken@1501
   244
				     p[2] == 0x00 && p[3] == 0x00 ) {
slouken@1501
   245
					cd->src_fmt = ENCODING_UTF32BE;
slouken@1501
   246
					break;
slouken@1501
   247
				} else if ( p[0] == 0x00 && p[1] == 0x00 &&
slouken@1501
   248
				            p[2] == 0xFE && p[3] == 0xFF ) {
slouken@1501
   249
					cd->src_fmt = ENCODING_UTF32LE;
slouken@1501
   250
					break;
slouken@1501
   251
				}
slouken@1501
   252
				p += 4;
slouken@1501
   253
				--n;
slouken@1501
   254
			}
slouken@1501
   255
			if ( n == 0 ) {
slouken@1501
   256
				/* We can't tell, default to host order */
slouken@1501
   257
				cd->src_fmt = ENCODING_UTF32NATIVE;
slouken@1501
   258
			}
slouken@1501
   259
		}
slouken@1501
   260
		break;
slouken@1501
   261
	}
slouken@1501
   262
slouken@1501
   263
	switch ( cd->dst_fmt ) {
slouken@1501
   264
	    case ENCODING_UTF16:
slouken@1501
   265
		/* Default to host order, need to add byte order marker */
slouken@1501
   266
		if ( dstlen < 2 ) {
slouken@1501
   267
			return SDL_ICONV_E2BIG;
slouken@1501
   268
		}
slouken@1501
   269
		*(Uint16 *)dst = UNICODE_BOM;
slouken@1501
   270
		dst += 2;
slouken@1501
   271
		dstlen -= 2;
slouken@1501
   272
		cd->dst_fmt = ENCODING_UTF16NATIVE;
slouken@1501
   273
		break;
slouken@1501
   274
	    case ENCODING_UTF32:
slouken@1501
   275
		/* Default to host order, need to add byte order marker */
slouken@1501
   276
		if ( dstlen < 4 ) {
slouken@1501
   277
			return SDL_ICONV_E2BIG;
slouken@1501
   278
		}
slouken@1501
   279
		*(Uint32 *)dst = UNICODE_BOM;
slouken@1501
   280
		dst += 4;
slouken@1501
   281
		dstlen -= 4;
slouken@1501
   282
		cd->dst_fmt = ENCODING_UTF32NATIVE;
slouken@1501
   283
		break;
slouken@1501
   284
	}
slouken@1501
   285
slouken@1501
   286
	total = 0;
slouken@1501
   287
	while ( srclen > 0 ) {
slouken@1501
   288
		/* Decode a character */
slouken@1501
   289
		switch ( cd->src_fmt ) {
slouken@1501
   290
		    case ENCODING_ASCII:
slouken@1501
   291
			{
slouken@1501
   292
				Uint8 *p = (Uint8 *)src;
slouken@1501
   293
				ch = (Uint32)(p[0] & 0x7F);
slouken@1501
   294
				++src;
slouken@1501
   295
				--srclen;
slouken@1501
   296
			}
slouken@1501
   297
			break;
slouken@1501
   298
		    case ENCODING_LATIN1:
slouken@1501
   299
			{
slouken@1501
   300
				Uint8 *p = (Uint8 *)src;
slouken@1501
   301
				ch = (Uint32)p[0];
slouken@1501
   302
				++src;
slouken@1501
   303
				--srclen;
slouken@1501
   304
			}
slouken@1501
   305
			break;
slouken@1501
   306
		    case ENCODING_UTF8: /* RFC 3629 */
slouken@1501
   307
			{
slouken@1501
   308
				Uint8 *p = (Uint8 *)src;
slouken@1501
   309
				size_t left = 0;
slouken@1501
   310
				SDL_bool overlong = SDL_FALSE;
slouken@1501
   311
				if ( p[0] >= 0xFC ) {
slouken@1501
   312
					if ( (p[0] & 0xFE) != 0xFC ) {
slouken@1501
   313
						/* Skip illegal sequences
slouken@1501
   314
						return SDL_ICONV_EILSEQ;
slouken@1501
   315
						*/
slouken@1501
   316
						ch = UNKNOWN_UNICODE;
slouken@1501
   317
					} else {
slouken@1501
   318
						if ( p[0] == 0xFC ) {
slouken@1501
   319
							overlong = SDL_TRUE;
slouken@1501
   320
						}
slouken@1501
   321
						ch = (Uint32)(p[0] & 0x01);
slouken@1501
   322
						left = 5;
slouken@1501
   323
					}
slouken@1501
   324
				} else if ( p[0] >= 0xF8 ) {
slouken@1501
   325
					if ( (p[0] & 0xFC) != 0xF8 ) {
slouken@1501
   326
						/* Skip illegal sequences
slouken@1501
   327
						return SDL_ICONV_EILSEQ;
slouken@1501
   328
						*/
slouken@1501
   329
						ch = UNKNOWN_UNICODE;
slouken@1501
   330
					} else {
slouken@1501
   331
						if ( p[0] == 0xF8 ) {
slouken@1501
   332
							overlong = SDL_TRUE;
slouken@1501
   333
						}
slouken@1501
   334
						ch = (Uint32)(p[0] & 0x03);
slouken@1501
   335
						left = 4;
slouken@1501
   336
					}
slouken@1501
   337
				} else if ( p[0] >= 0xF0 ) {
slouken@1501
   338
					if ( (p[0] & 0xF8) != 0xF0 ) {
slouken@1501
   339
						/* Skip illegal sequences
slouken@1501
   340
						return SDL_ICONV_EILSEQ;
slouken@1501
   341
						*/
slouken@1501
   342
						ch = UNKNOWN_UNICODE;
slouken@1501
   343
					} else {
slouken@1501
   344
						if ( p[0] == 0xF0 ) {
slouken@1501
   345
							overlong = SDL_TRUE;
slouken@1501
   346
						}
slouken@1501
   347
						ch = (Uint32)(p[0] & 0x07);
slouken@1501
   348
						left = 3;
slouken@1501
   349
					}
slouken@1501
   350
				} else if ( p[0] >= 0xE0 ) {
slouken@1501
   351
					if ( (p[0] & 0xF0) != 0xE0 ) {
slouken@1501
   352
						/* Skip illegal sequences
slouken@1501
   353
						return SDL_ICONV_EILSEQ;
slouken@1501
   354
						*/
slouken@1501
   355
						ch = UNKNOWN_UNICODE;
slouken@1501
   356
					} else {
slouken@1501
   357
						if ( p[0] == 0xE0 ) {
slouken@1501
   358
							overlong = SDL_TRUE;
slouken@1501
   359
						}
slouken@1501
   360
						ch = (Uint32)(p[0] & 0x0F);
slouken@1501
   361
						left = 2;
slouken@1501
   362
					}
slouken@1501
   363
				} else if ( p[0] >= 0xC0 ) {
slouken@1501
   364
					if ( (p[0] & 0xE0) != 0xC0 ) {
slouken@1501
   365
						/* Skip illegal sequences
slouken@1501
   366
						return SDL_ICONV_EILSEQ;
slouken@1501
   367
						*/
slouken@1501
   368
						ch = UNKNOWN_UNICODE;
slouken@1501
   369
					} else {
slouken@1501
   370
						if ( (p[0] & 0xCE) == 0xC0 ) {
slouken@1501
   371
							overlong = SDL_TRUE;
slouken@1501
   372
						}
slouken@1501
   373
						ch = (Uint32)(p[0] & 0x1F);
slouken@1501
   374
						left = 1;
slouken@1501
   375
					}
slouken@1501
   376
				} else {
slouken@1501
   377
					if ( (p[0] & 0x80) != 0x00 ) {
slouken@1501
   378
						/* Skip illegal sequences
slouken@1501
   379
						return SDL_ICONV_EILSEQ;
slouken@1501
   380
						*/
slouken@1501
   381
						ch = UNKNOWN_UNICODE;
slouken@1501
   382
					} else {
slouken@1501
   383
						ch = (Uint32)p[0];
slouken@1501
   384
					}
slouken@1501
   385
				}
slouken@1501
   386
				++src;
slouken@1501
   387
				--srclen;
slouken@1501
   388
				if ( srclen < left ) {
slouken@1501
   389
					return SDL_ICONV_EINVAL;
slouken@1501
   390
				}
slouken@1501
   391
				while ( left-- ) {
slouken@1501
   392
					++p;
slouken@1501
   393
					if ( (p[0] & 0xC0) != 0x80 ) {
slouken@1501
   394
						/* Skip illegal sequences
slouken@1501
   395
						return SDL_ICONV_EILSEQ;
slouken@1501
   396
						*/
slouken@1501
   397
						ch = UNKNOWN_UNICODE;
slouken@1501
   398
						break;
slouken@1501
   399
					}
slouken@1501
   400
					ch <<= 6;
slouken@1501
   401
					ch |= (p[0] & 0x3F);
slouken@1501
   402
					++src;
slouken@1501
   403
					--srclen;
slouken@1501
   404
				}
slouken@1501
   405
				if ( overlong ) {
slouken@1501
   406
					/* Potential security risk
slouken@1501
   407
					return SDL_ICONV_EILSEQ;
slouken@1501
   408
					*/
slouken@1501
   409
					ch = UNKNOWN_UNICODE;
slouken@1501
   410
				}
slouken@1501
   411
				if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
slouken@1519
   412
				     (ch == 0xFFFE || ch == 0xFFFF) ||
slouken@1519
   413
				     ch > 0x10FFFF ) {
slouken@1501
   414
					/* Skip illegal sequences
slouken@1501
   415
					return SDL_ICONV_EILSEQ;
slouken@1501
   416
					*/
slouken@1501
   417
					ch = UNKNOWN_UNICODE;
slouken@1501
   418
				}
slouken@1501
   419
			}
slouken@1501
   420
			break;
slouken@1501
   421
		    case ENCODING_UTF16BE: /* RFC 2781 */
slouken@1501
   422
			{
slouken@1501
   423
				Uint8 *p = (Uint8 *)src;
slouken@1501
   424
				Uint16 W1, W2;
slouken@1501
   425
				if ( srclen < 2 ) {
slouken@1501
   426
					return SDL_ICONV_EINVAL;
slouken@1501
   427
				}
slouken@1513
   428
				W1 = ((Uint16)p[0] << 8) |
slouken@1513
   429
				      (Uint16)p[1];
slouken@1501
   430
				src += 2;
slouken@1501
   431
				srclen -= 2;
slouken@1501
   432
				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
slouken@1501
   433
					ch = (Uint32)W1;
slouken@1501
   434
					break;
slouken@1501
   435
				}
slouken@1501
   436
				if ( W1 > 0xDBFF ) {
slouken@1501
   437
					/* Skip illegal sequences
slouken@1501
   438
					return SDL_ICONV_EILSEQ;
slouken@1501
   439
					*/
slouken@1501
   440
					ch = UNKNOWN_UNICODE;
slouken@1501
   441
					break;
slouken@1501
   442
				}
slouken@1501
   443
				if ( srclen < 2 ) {
slouken@1501
   444
					return SDL_ICONV_EINVAL;
slouken@1501
   445
				}
slouken@1510
   446
				p = (Uint8 *)src;
slouken@1513
   447
				W2 = ((Uint16)p[0] << 8) |
slouken@1513
   448
				      (Uint16)p[1];
slouken@1501
   449
				src += 2;
slouken@1501
   450
				srclen -= 2;
slouken@1501
   451
				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
slouken@1501
   452
					/* Skip illegal sequences
slouken@1501
   453
					return SDL_ICONV_EILSEQ;
slouken@1501
   454
					*/
slouken@1501
   455
					ch = UNKNOWN_UNICODE;
slouken@1501
   456
					break;
slouken@1501
   457
				}
slouken@1501
   458
				ch = (((Uint32)(W1 & 0x3FF) << 10) |
slouken@1501
   459
				      (Uint32)(W2 & 0x3FF)) + 0x10000;
slouken@1501
   460
			}
slouken@1501
   461
			break;
slouken@1501
   462
		    case ENCODING_UTF16LE: /* RFC 2781 */
slouken@1501
   463
			{
slouken@1501
   464
				Uint8 *p = (Uint8 *)src;
slouken@1501
   465
				Uint16 W1, W2;
slouken@1501
   466
				if ( srclen < 2 ) {
slouken@1501
   467
					return SDL_ICONV_EINVAL;
slouken@1501
   468
				}
slouken@1513
   469
				W1 = ((Uint16)p[1] << 8) |
slouken@1513
   470
				      (Uint16)p[0];
slouken@1501
   471
				src += 2;
slouken@1501
   472
				srclen -= 2;
slouken@1501
   473
				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
slouken@1501
   474
					ch = (Uint32)W1;
slouken@1501
   475
					break;
slouken@1501
   476
				}
slouken@1501
   477
				if ( W1 > 0xDBFF ) {
slouken@1501
   478
					/* Skip illegal sequences
slouken@1501
   479
					return SDL_ICONV_EILSEQ;
slouken@1501
   480
					*/
slouken@1501
   481
					ch = UNKNOWN_UNICODE;
slouken@1501
   482
					break;
slouken@1501
   483
				}
slouken@1501
   484
				if ( srclen < 2 ) {
slouken@1501
   485
					return SDL_ICONV_EINVAL;
slouken@1501
   486
				}
slouken@1510
   487
				p = (Uint8 *)src;
slouken@1513
   488
				W2 = ((Uint16)p[1] << 8) |
slouken@1513
   489
				      (Uint16)p[0];
slouken@1501
   490
				src += 2;
slouken@1501
   491
				srclen -= 2;
slouken@1501
   492
				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
slouken@1501
   493
					/* Skip illegal sequences
slouken@1501
   494
					return SDL_ICONV_EILSEQ;
slouken@1501
   495
					*/
slouken@1501
   496
					ch = UNKNOWN_UNICODE;
slouken@1501
   497
					break;
slouken@1501
   498
				}
slouken@1501
   499
				ch = (((Uint32)(W1 & 0x3FF) << 10) |
slouken@1501
   500
				      (Uint32)(W2 & 0x3FF)) + 0x10000;
slouken@1501
   501
			}
slouken@1501
   502
			break;
slouken@1501
   503
		    case ENCODING_UTF32BE:
slouken@1501
   504
			{
slouken@1501
   505
				Uint8 *p = (Uint8 *)src;
slouken@1501
   506
				if ( srclen < 4 ) {
slouken@1501
   507
					return SDL_ICONV_EINVAL;
slouken@1501
   508
				}
slouken@1501
   509
				ch = ((Uint32)p[0] << 24) |
slouken@1501
   510
				     ((Uint32)p[1] << 16) |
slouken@1501
   511
				     ((Uint32)p[2] << 8) |
slouken@1501
   512
				      (Uint32)p[3];
slouken@1501
   513
				src += 4;
slouken@1501
   514
				srclen -= 4;
slouken@1501
   515
			}
slouken@1501
   516
			break;
slouken@1501
   517
		    case ENCODING_UTF32LE:
slouken@1501
   518
			{
slouken@1501
   519
				Uint8 *p = (Uint8 *)src;
slouken@1501
   520
				if ( srclen < 4 ) {
slouken@1501
   521
					return SDL_ICONV_EINVAL;
slouken@1501
   522
				}
slouken@1501
   523
				ch = ((Uint32)p[3] << 24) |
slouken@1501
   524
				     ((Uint32)p[2] << 16) |
slouken@1501
   525
				     ((Uint32)p[1] << 8) |
slouken@1501
   526
				      (Uint32)p[0];
slouken@1501
   527
				src += 4;
slouken@1501
   528
				srclen -= 4;
slouken@1501
   529
			}
slouken@1501
   530
			break;
slouken@1501
   531
		    case ENCODING_UCS2:
slouken@1501
   532
			{
slouken@1501
   533
				Uint16 *p = (Uint16 *)src;
slouken@1501
   534
				if ( srclen < 2 ) {
slouken@1501
   535
					return SDL_ICONV_EINVAL;
slouken@1501
   536
				}
slouken@1501
   537
				ch = *p;
slouken@1501
   538
				src += 2;
slouken@1501
   539
				srclen -= 2;
slouken@1501
   540
			}
slouken@1501
   541
			break;
slouken@1501
   542
		    case ENCODING_UCS4:
slouken@1501
   543
			{
slouken@1501
   544
				Uint32 *p = (Uint32 *)src;
slouken@1501
   545
				if ( srclen < 4 ) {
slouken@1501
   546
					return SDL_ICONV_EINVAL;
slouken@1501
   547
				}
slouken@1501
   548
				ch = *p;
slouken@1501
   549
				src += 4;
slouken@1501
   550
				srclen -= 4;
slouken@1501
   551
			}
slouken@1501
   552
			break;
slouken@1501
   553
		}
slouken@1501
   554
slouken@1501
   555
		/* Encode a character */
slouken@1501
   556
		switch ( cd->dst_fmt ) {
slouken@1501
   557
		    case ENCODING_ASCII:
slouken@1501
   558
			{
slouken@1501
   559
				Uint8 *p = (Uint8 *)dst;
slouken@1501
   560
				if ( dstlen < 1 ) {
slouken@1501
   561
					return SDL_ICONV_E2BIG;
slouken@1501
   562
				}
slouken@1501
   563
				if ( ch > 0x7F ) {
slouken@1501
   564
					*p = UNKNOWN_ASCII;
slouken@1501
   565
				} else {
slouken@1501
   566
					*p = (Uint8)ch;
slouken@1501
   567
				}
slouken@1501
   568
				++dst;
slouken@1501
   569
				--dstlen;
slouken@1501
   570
			}
slouken@1501
   571
			break;
slouken@1501
   572
		    case ENCODING_LATIN1:
slouken@1501
   573
			{
slouken@1501
   574
				Uint8 *p = (Uint8 *)dst;
slouken@1501
   575
				if ( dstlen < 1 ) {
slouken@1501
   576
					return SDL_ICONV_E2BIG;
slouken@1501
   577
				}
slouken@1501
   578
				if ( ch > 0xFF ) {
slouken@1501
   579
					*p = UNKNOWN_ASCII;
slouken@1501
   580
				} else {
slouken@1501
   581
					*p = (Uint8)ch;
slouken@1501
   582
				}
slouken@1501
   583
				++dst;
slouken@1501
   584
				--dstlen;
slouken@1501
   585
			}
slouken@1501
   586
			break;
slouken@1501
   587
		    case ENCODING_UTF8: /* RFC 3629 */
slouken@1501
   588
			{
slouken@1501
   589
				Uint8 *p = (Uint8 *)dst;
slouken@1519
   590
				if ( ch > 0x10FFFF ) {
slouken@1501
   591
					ch = UNKNOWN_UNICODE;
slouken@1501
   592
				}
slouken@1501
   593
				if ( ch <= 0x7F ) {
slouken@1501
   594
					if ( dstlen < 1 ) {
slouken@1501
   595
						return SDL_ICONV_E2BIG;
slouken@1501
   596
					}
slouken@1501
   597
					*p = (Uint8)ch;
slouken@1501
   598
					++dst;
slouken@1501
   599
					--dstlen;
slouken@1501
   600
				} else if ( ch <= 0x7FF ) {
slouken@1501
   601
					if ( dstlen < 2 ) {
slouken@1501
   602
						return SDL_ICONV_E2BIG;
slouken@1501
   603
					}
slouken@1501
   604
					p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
slouken@1501
   605
					p[1] = 0x80 | (Uint8)(ch & 0x3F);
slouken@1501
   606
					dst += 2;
slouken@1501
   607
					dstlen -= 2;
slouken@1501
   608
				} else if ( ch <= 0xFFFF ) {
slouken@1501
   609
					if ( dstlen < 3 ) {
slouken@1501
   610
						return SDL_ICONV_E2BIG;
slouken@1501
   611
					}
slouken@1501
   612
					p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
slouken@1501
   613
					p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
slouken@1501
   614
					p[2] = 0x80 | (Uint8)(ch & 0x3F);
slouken@1501
   615
					dst += 3;
slouken@1501
   616
					dstlen -= 3;
slouken@1501
   617
				} else if ( ch <= 0x1FFFFF ) {
slouken@1501
   618
					if ( dstlen < 4 ) {
slouken@1501
   619
						return SDL_ICONV_E2BIG;
slouken@1501
   620
					}
slouken@1501
   621
					p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
slouken@1501
   622
					p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
slouken@1501
   623
					p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
slouken@1501
   624
					p[3] = 0x80 | (Uint8)(ch & 0x3F);
slouken@1501
   625
					dst += 4;
slouken@1501
   626
					dstlen -= 4;
slouken@1501
   627
				} else if ( ch <= 0x3FFFFFF ) {
slouken@1501
   628
					if ( dstlen < 5 ) {
slouken@1501
   629
						return SDL_ICONV_E2BIG;
slouken@1501
   630
					}
slouken@1501
   631
					p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
slouken@1501
   632
					p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
slouken@1501
   633
					p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
slouken@1501
   634
					p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
slouken@1501
   635
					p[4] = 0x80 | (Uint8)(ch & 0x3F);
slouken@1501
   636
					dst += 5;
slouken@1501
   637
					dstlen -= 5;
slouken@1501
   638
				} else {
slouken@1501
   639
					if ( dstlen < 6 ) {
slouken@1501
   640
						return SDL_ICONV_E2BIG;
slouken@1501
   641
					}
slouken@1501
   642
					p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
slouken@1501
   643
					p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
slouken@1501
   644
					p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
slouken@1501
   645
					p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
slouken@1501
   646
					p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
slouken@1501
   647
					p[5] = 0x80 | (Uint8)(ch & 0x3F);
slouken@1501
   648
					dst += 6;
slouken@1501
   649
					dstlen -= 6;
slouken@1501
   650
				}
slouken@1501
   651
			}
slouken@1501
   652
			break;
slouken@1501
   653
		    case ENCODING_UTF16BE: /* RFC 2781 */
slouken@1501
   654
			{
slouken@1501
   655
				Uint8 *p = (Uint8 *)dst;
slouken@1501
   656
				if ( ch > 0x10FFFF ) {
slouken@1501
   657
					ch = UNKNOWN_UNICODE;
slouken@1501
   658
				}
slouken@1501
   659
				if ( ch < 0x10000 ) {
slouken@1501
   660
					if ( dstlen < 2 ) {
slouken@1501
   661
						return SDL_ICONV_E2BIG;
slouken@1501
   662
					}
slouken@1501
   663
					p[0] = (Uint8)(ch >> 8);
slouken@1501
   664
					p[1] = (Uint8)ch;
slouken@1501
   665
					dst += 2;
slouken@1501
   666
					dstlen -= 2;
slouken@1501
   667
				} else {
slouken@1501
   668
					Uint16 W1, W2;
slouken@1501
   669
					if ( dstlen < 4 ) {
slouken@1501
   670
						return SDL_ICONV_E2BIG;
slouken@1501
   671
					}
slouken@1501
   672
					ch = ch - 0x10000;
slouken@1501
   673
					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
slouken@1501
   674
					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
slouken@1501
   675
					p[0] = (Uint8)(W1 >> 8);
slouken@1501
   676
					p[1] = (Uint8)W1;
slouken@1501
   677
					p[2] = (Uint8)(W2 >> 8);
slouken@1501
   678
					p[3] = (Uint8)W2;
slouken@1501
   679
					dst += 4;
slouken@1501
   680
					dstlen -= 4;
slouken@1501
   681
				}
slouken@1501
   682
			}
slouken@1501
   683
			break;
slouken@1501
   684
		    case ENCODING_UTF16LE: /* RFC 2781 */
slouken@1501
   685
			{
slouken@1501
   686
				Uint8 *p = (Uint8 *)dst;
slouken@1501
   687
				if ( ch > 0x10FFFF ) {
slouken@1501
   688
					ch = UNKNOWN_UNICODE;
slouken@1501
   689
				}
slouken@1501
   690
				if ( ch < 0x10000 ) {
slouken@1501
   691
					if ( dstlen < 2 ) {
slouken@1501
   692
						return SDL_ICONV_E2BIG;
slouken@1501
   693
					}
slouken@1501
   694
					p[1] = (Uint8)(ch >> 8);
slouken@1501
   695
					p[0] = (Uint8)ch;
slouken@1501
   696
					dst += 2;
slouken@1501
   697
					dstlen -= 2;
slouken@1501
   698
				} else {
slouken@1501
   699
					Uint16 W1, W2;
slouken@1501
   700
					if ( dstlen < 4 ) {
slouken@1501
   701
						return SDL_ICONV_E2BIG;
slouken@1501
   702
					}
slouken@1501
   703
					ch = ch - 0x10000;
slouken@1501
   704
					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
slouken@1501
   705
					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
slouken@1501
   706
					p[1] = (Uint8)(W1 >> 8);
slouken@1501
   707
					p[0] = (Uint8)W1;
slouken@1501
   708
					p[3] = (Uint8)(W2 >> 8);
slouken@1501
   709
					p[2] = (Uint8)W2;
slouken@1501
   710
					dst += 4;
slouken@1501
   711
					dstlen -= 4;
slouken@1501
   712
				}
slouken@1501
   713
			}
slouken@1501
   714
			break;
slouken@1501
   715
		    case ENCODING_UTF32BE:
slouken@1501
   716
			{
slouken@1501
   717
				Uint8 *p = (Uint8 *)dst;
slouken@1502
   718
				if ( ch > 0x10FFFF ) {
slouken@1501
   719
					ch = UNKNOWN_UNICODE;
slouken@1501
   720
				}
slouken@1501
   721
				if ( dstlen < 4 ) {
slouken@1501
   722
					return SDL_ICONV_E2BIG;
slouken@1501
   723
				}
slouken@1501
   724
				p[0] = (Uint8)(ch >> 24);
slouken@1501
   725
				p[1] = (Uint8)(ch >> 16);
slouken@1501
   726
				p[2] = (Uint8)(ch >> 8);
slouken@1501
   727
				p[3] = (Uint8)ch;
slouken@1501
   728
				dst += 4;
slouken@1501
   729
				dstlen -= 4;
slouken@1501
   730
			}
slouken@1501
   731
			break;
slouken@1501
   732
		    case ENCODING_UTF32LE:
slouken@1501
   733
			{
slouken@1501
   734
				Uint8 *p = (Uint8 *)dst;
slouken@1502
   735
				if ( ch > 0x10FFFF ) {
slouken@1501
   736
					ch = UNKNOWN_UNICODE;
slouken@1501
   737
				}
slouken@1501
   738
				if ( dstlen < 4 ) {
slouken@1501
   739
					return SDL_ICONV_E2BIG;
slouken@1501
   740
				}
slouken@1501
   741
				p[3] = (Uint8)(ch >> 24);
slouken@1501
   742
				p[2] = (Uint8)(ch >> 16);
slouken@1501
   743
				p[1] = (Uint8)(ch >> 8);
slouken@1501
   744
				p[0] = (Uint8)ch;
slouken@1501
   745
				dst += 4;
slouken@1501
   746
				dstlen -= 4;
slouken@1501
   747
			}
slouken@1501
   748
			break;
slouken@1501
   749
		    case ENCODING_UCS2:
slouken@1501
   750
			{
slouken@1501
   751
				Uint16 *p = (Uint16 *)dst;
slouken@1501
   752
				if ( ch > 0xFFFF ) {
slouken@1501
   753
					ch = UNKNOWN_UNICODE;
slouken@1501
   754
				}
slouken@1501
   755
				if ( dstlen < 2 ) {
slouken@1501
   756
					return SDL_ICONV_E2BIG;
slouken@1501
   757
				}
slouken@1501
   758
				*p = (Uint16)ch;
slouken@1501
   759
				dst += 2;
slouken@1501
   760
				dstlen -= 2;
slouken@1501
   761
			}
slouken@1501
   762
			break;
slouken@1501
   763
		    case ENCODING_UCS4:
slouken@1501
   764
			{
slouken@1501
   765
				Uint32 *p = (Uint32 *)dst;
slouken@1501
   766
				if ( ch > 0x7FFFFFFF ) {
slouken@1501
   767
					ch = UNKNOWN_UNICODE;
slouken@1501
   768
				}
slouken@1501
   769
				if ( dstlen < 4 ) {
slouken@1501
   770
					return SDL_ICONV_E2BIG;
slouken@1501
   771
				}
slouken@1501
   772
				*p = ch;
slouken@1501
   773
				dst += 4;
slouken@1501
   774
				dstlen -= 4;
slouken@1501
   775
			}
slouken@1501
   776
			break;
slouken@1501
   777
		}
slouken@1501
   778
slouken@1501
   779
		/* Update state */
slouken@1501
   780
		*inbuf = src;
slouken@1501
   781
		*inbytesleft = srclen;
slouken@1501
   782
		*outbuf = dst;
slouken@1501
   783
		*outbytesleft = dstlen;
slouken@1501
   784
		++total;
slouken@1501
   785
	}
slouken@1501
   786
	return total;
slouken@1501
   787
}
slouken@1501
   788
slouken@1501
   789
int SDL_iconv_close(SDL_iconv_t cd)
slouken@1501
   790
{
slouken@1501
   791
	if ( cd && cd != (SDL_iconv_t)-1 ) {
slouken@1501
   792
		SDL_free(cd);
slouken@1501
   793
	}
slouken@1501
   794
	return 0;
slouken@1501
   795
}
slouken@1501
   796
slouken@1501
   797
#endif /* !HAVE_ICONV */
slouken@1501
   798
icculus@3918
   799
char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
slouken@1501
   800
{
slouken@1501
   801
	SDL_iconv_t cd;
slouken@1501
   802
	char *string;
slouken@1501
   803
	size_t stringsize;
slouken@1501
   804
	char *outbuf;
slouken@1501
   805
	size_t outbytesleft;
slouken@1501
   806
	size_t retCode = 0;
slouken@1501
   807
slouken@1501
   808
	cd = SDL_iconv_open(tocode, fromcode);
slouken@1501
   809
	if ( cd == (SDL_iconv_t)-1 ) {
slouken@4054
   810
		/* See if we can recover here (fixes iconv on Solaris 11) */
slouken@4054
   811
		if ( !tocode || !*tocode ) {
slouken@4054
   812
			tocode = "UTF-8";
slouken@4054
   813
		}
slouken@4054
   814
		if ( !fromcode || !*fromcode ) {
slouken@4054
   815
			tocode = "UTF-8";
slouken@4054
   816
		}
slouken@4054
   817
		cd = SDL_iconv_open(tocode, fromcode);
slouken@4054
   818
	}
slouken@4054
   819
	if ( cd == (SDL_iconv_t)-1 ) {
slouken@1501
   820
		return NULL;
slouken@1501
   821
	}
slouken@1501
   822
slouken@1501
   823
	stringsize = inbytesleft > 4 ? inbytesleft : 4;
slouken@1501
   824
	string = SDL_malloc(stringsize);
slouken@1501
   825
	if ( !string ) {
slouken@1501
   826
		SDL_iconv_close(cd);
slouken@1501
   827
		return NULL;
slouken@1501
   828
	}
slouken@1501
   829
	outbuf = string;
slouken@1501
   830
	outbytesleft = stringsize;
slouken@1501
   831
	SDL_memset(outbuf, 0, 4);
slouken@1501
   832
slouken@1501
   833
	while ( inbytesleft > 0 ) {
slouken@1501
   834
		retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
slouken@1501
   835
		switch (retCode) {
slouken@1501
   836
		    case SDL_ICONV_E2BIG:
slouken@1501
   837
			{
slouken@1501
   838
				char *oldstring = string;
slouken@1501
   839
				stringsize *= 2;
slouken@1501
   840
				string = SDL_realloc(string, stringsize);
slouken@1501
   841
				if ( !string ) {
slouken@1501
   842
					SDL_iconv_close(cd);
slouken@1501
   843
					return NULL;
slouken@1501
   844
				}
slouken@1501
   845
				outbuf = string + (outbuf - oldstring);
slouken@1501
   846
				outbytesleft = stringsize - (outbuf - string);
slouken@1501
   847
				SDL_memset(outbuf, 0, 4);
slouken@1501
   848
			}
slouken@1501
   849
			break;
slouken@1501
   850
		    case SDL_ICONV_EILSEQ:
slouken@1501
   851
			/* Try skipping some input data - not perfect, but... */
slouken@1501
   852
			++inbuf;
slouken@1501
   853
			--inbytesleft;
slouken@1501
   854
			break;
slouken@1501
   855
		    case SDL_ICONV_EINVAL:
slouken@1501
   856
		    case SDL_ICONV_ERROR:
slouken@1501
   857
			/* We can't continue... */
slouken@1501
   858
			inbytesleft = 0;
slouken@1501
   859
			break;
slouken@1501
   860
		}
slouken@1501
   861
	}
slouken@1501
   862
	SDL_iconv_close(cd);
slouken@1501
   863
slouken@1501
   864
	return string;
slouken@1501
   865
}