src/stdlib/SDL_iconv.c
author Sam Lantinga <slouken@libsdl.org>
Thu, 12 Jul 2007 07:55:18 +0000
changeset 2183 9f31740cad2e
parent 2182 cc2597da0840
child 2184 8f8516e79a13
permissions -rw-r--r--
Whoops, needed to remove the other version of getlocale()
slouken@1501
     1
/*
slouken@1501
     2
    SDL - Simple DirectMedia Layer
slouken@1501
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@1501
     4
slouken@1501
     5
    This library is free software; you can redistribute it and/or
slouken@1501
     6
    modify it under the terms of the GNU Lesser General Public
slouken@1501
     7
    License as published by the Free Software Foundation; either
slouken@1501
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@1501
     9
slouken@1501
    10
    This library is distributed in the hope that it will be useful,
slouken@1501
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@1501
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1501
    13
    Lesser General Public License for more details.
slouken@1501
    14
slouken@1501
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1501
    16
    License along with this library; if not, write to the Free Software
slouken@1501
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@1501
    18
slouken@1501
    19
    Sam Lantinga
slouken@1501
    20
    slouken@libsdl.org
slouken@1501
    21
*/
slouken@1501
    22
#include "SDL_config.h"
slouken@1501
    23
slouken@1501
    24
/* This file contains portable iconv functions for SDL */
slouken@1501
    25
slouken@1501
    26
#include "SDL_stdinc.h"
slouken@1501
    27
#include "SDL_endian.h"
slouken@1501
    28
slouken@1501
    29
#ifdef HAVE_ICONV
slouken@1501
    30
slouken@2136
    31
/* Depending on which standard the iconv() was implemented with,
slouken@2136
    32
   iconv() may or may not use const char ** for the inbuf param.
slouken@2136
    33
   If we get this wrong, it's just a warning, so no big deal.
slouken@2136
    34
*/
slouken@2136
    35
#if defined(_XGP6) || \
slouken@2136
    36
    defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
slouken@2136
    37
#define ICONV_INBUF_NONCONST
slouken@2136
    38
#endif
slouken@2136
    39
slouken@1501
    40
#include <errno.h>
slouken@1501
    41
slouken@1895
    42
size_t
slouken@1895
    43
SDL_iconv(SDL_iconv_t cd,
slouken@2135
    44
          const char **inbuf, size_t * inbytesleft,
slouken@1895
    45
          char **outbuf, size_t * outbytesleft)
slouken@1501
    46
{
slouken@2135
    47
    size_t retCode;
slouken@2136
    48
#ifdef ICONV_INBUF_NONCONST
slouken@2136
    49
    retCode = iconv(cd, (char **) inbuf, inbytesleft, outbuf, outbytesleft);
slouken@2135
    50
#else
slouken@2136
    51
    retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
slouken@2135
    52
#endif
slouken@1895
    53
    if (retCode == (size_t) - 1) {
slouken@1895
    54
        switch (errno) {
slouken@1895
    55
        case E2BIG:
slouken@1895
    56
            return SDL_ICONV_E2BIG;
slouken@1895
    57
        case EILSEQ:
slouken@1895
    58
            return SDL_ICONV_EILSEQ;
slouken@1895
    59
        case EINVAL:
slouken@1895
    60
            return SDL_ICONV_EINVAL;
slouken@1895
    61
        default:
slouken@1895
    62
            return SDL_ICONV_ERROR;
slouken@1895
    63
        }
slouken@1895
    64
    }
slouken@1895
    65
    return retCode;
slouken@1501
    66
}
slouken@1501
    67
slouken@1501
    68
#else
slouken@1501
    69
slouken@1503
    70
/* Lots of useful information on Unicode at:
slouken@1503
    71
	http://www.cl.cam.ac.uk/~mgk25/unicode.html
slouken@1503
    72
*/
slouken@1503
    73
slouken@1501
    74
#define UNICODE_BOM	0xFEFF
slouken@1501
    75
slouken@1501
    76
#define UNKNOWN_ASCII	'?'
slouken@1501
    77
#define UNKNOWN_UNICODE	0xFFFD
slouken@1501
    78
slouken@1895
    79
enum
slouken@1895
    80
{
slouken@1895
    81
    ENCODING_UNKNOWN,
slouken@1895
    82
    ENCODING_ASCII,
slouken@1895
    83
    ENCODING_LATIN1,
slouken@1895
    84
    ENCODING_UTF8,
slouken@1895
    85
    ENCODING_UTF16,             /* Needs byte order marker */
slouken@1895
    86
    ENCODING_UTF16BE,
slouken@1895
    87
    ENCODING_UTF16LE,
slouken@1895
    88
    ENCODING_UTF32,             /* Needs byte order marker */
slouken@1895
    89
    ENCODING_UTF32BE,
slouken@1895
    90
    ENCODING_UTF32LE,
slouken@1895
    91
    ENCODING_UCS2,              /* Native byte order assumed */
slouken@1895
    92
    ENCODING_UCS4,              /* Native byte order assumed */
slouken@1501
    93
};
slouken@1501
    94
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
slouken@1501
    95
#define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
slouken@1501
    96
#define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
slouken@1501
    97
#else
slouken@1501
    98
#define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
slouken@1501
    99
#define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
slouken@1501
   100
#endif
slouken@1501
   101
slouken@1501
   102
struct _SDL_iconv_t
slouken@1501
   103
{
slouken@1895
   104
    int src_fmt;
slouken@1895
   105
    int dst_fmt;
slouken@1501
   106
};
slouken@1501
   107
slouken@1895
   108
static struct
slouken@1895
   109
{
slouken@1895
   110
    const char *name;
slouken@1895
   111
    int format;
slouken@1501
   112
} encodings[] = {
slouken@2142
   113
/* *INDENT-OFF* */
slouken@2142
   114
    { "ASCII", ENCODING_ASCII },
slouken@2142
   115
    { "US-ASCII", ENCODING_ASCII },
slouken@2142
   116
    { "8859-1", ENCODING_LATIN1 },
slouken@2142
   117
    { "ISO-8859-1", ENCODING_LATIN1 },
slouken@2142
   118
    { "UTF8", ENCODING_UTF8 },
slouken@2142
   119
    { "UTF-8", ENCODING_UTF8 },
slouken@2142
   120
    { "UTF16", ENCODING_UTF16 },
slouken@2142
   121
    { "UTF-16", ENCODING_UTF16 },
slouken@2142
   122
    { "UTF16BE", ENCODING_UTF16BE },
slouken@2142
   123
    { "UTF-16BE", ENCODING_UTF16BE },
slouken@2142
   124
    { "UTF16LE", ENCODING_UTF16LE },
slouken@2142
   125
    { "UTF-16LE", ENCODING_UTF16LE },
slouken@2142
   126
    { "UTF32", ENCODING_UTF32 },
slouken@2142
   127
    { "UTF-32", ENCODING_UTF32 },
slouken@2142
   128
    { "UTF32BE", ENCODING_UTF32BE },
slouken@2142
   129
    { "UTF-32BE", ENCODING_UTF32BE },
slouken@2142
   130
    { "UTF32LE", ENCODING_UTF32LE },
slouken@2142
   131
    { "UTF-32LE", ENCODING_UTF32LE },
slouken@2142
   132
    { "UCS2", ENCODING_UCS2 },
slouken@2142
   133
    { "UCS-2", ENCODING_UCS2 },
slouken@2142
   134
    { "UCS4", ENCODING_UCS4 },
slouken@2142
   135
    { "UCS-4", ENCODING_UCS4 },
slouken@2142
   136
/* *INDENT-ON* */
slouken@2142
   137
};
slouken@1501
   138
slouken@2182
   139
static const char *
slouken@2182
   140
getlocale()
slouken@2182
   141
{
slouken@2182
   142
    const char *lang;
slouken@2182
   143
slouken@2182
   144
    lang = SDL_getenv("LC_ALL");
slouken@2182
   145
    if (!lang) {
slouken@2182
   146
        lang = SDL_getenv("LC_CTYPE");
slouken@2182
   147
    }
slouken@2182
   148
    if (!lang) {
slouken@2182
   149
        lang = SDL_getenv("LC_MESSAGES");
slouken@2182
   150
    }
slouken@2182
   151
    if (!lang) {
slouken@2182
   152
        lang = SDL_getenv("LANG");
slouken@2182
   153
    }
slouken@2182
   154
    if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
slouken@2182
   155
        lang = "ASCII";
slouken@2182
   156
    }
slouken@2182
   157
    return lang;
slouken@2182
   158
}
slouken@2182
   159
slouken@1895
   160
SDL_iconv_t
slouken@1895
   161
SDL_iconv_open(const char *tocode, const char *fromcode)
slouken@1501
   162
{
slouken@1895
   163
    int src_fmt = ENCODING_UNKNOWN;
slouken@1895
   164
    int dst_fmt = ENCODING_UNKNOWN;
slouken@1895
   165
    int i;
slouken@1501
   166
slouken@2182
   167
    if (!fromcode || !*fromcode) {
slouken@2182
   168
        fromcode = getlocale();
slouken@2182
   169
    }
slouken@2182
   170
    if (!tocode || !*tocode) {
slouken@2182
   171
        fromcode = getlocale();
slouken@2182
   172
    }
slouken@1895
   173
    for (i = 0; i < SDL_arraysize(encodings); ++i) {
slouken@1895
   174
        if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
slouken@1895
   175
            src_fmt = encodings[i].format;
slouken@1895
   176
            if (dst_fmt != ENCODING_UNKNOWN) {
slouken@1895
   177
                break;
slouken@1895
   178
            }
slouken@1895
   179
        }
slouken@1895
   180
        if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
slouken@1895
   181
            dst_fmt = encodings[i].format;
slouken@1895
   182
            if (src_fmt != ENCODING_UNKNOWN) {
slouken@1895
   183
                break;
slouken@1895
   184
            }
slouken@1895
   185
        }
slouken@1895
   186
    }
slouken@1895
   187
    if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
slouken@1895
   188
        SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
slouken@1895
   189
        if (cd) {
slouken@1895
   190
            cd->src_fmt = src_fmt;
slouken@1895
   191
            cd->dst_fmt = dst_fmt;
slouken@1895
   192
            return cd;
slouken@1895
   193
        }
slouken@1895
   194
    }
slouken@1895
   195
    return (SDL_iconv_t) - 1;
slouken@1501
   196
}
slouken@1501
   197
slouken@1895
   198
size_t
slouken@1895
   199
SDL_iconv(SDL_iconv_t cd,
slouken@2135
   200
          const char **inbuf, size_t * inbytesleft,
slouken@1895
   201
          char **outbuf, size_t * outbytesleft)
slouken@1501
   202
{
slouken@1895
   203
    /* For simplicity, we'll convert everything to and from UCS-4 */
slouken@2135
   204
    const char *src;
slouken@2135
   205
    char *dst;
slouken@1895
   206
    size_t srclen, dstlen;
slouken@2135
   207
    Uint32 ch = 0;
slouken@1895
   208
    size_t total;
slouken@1501
   209
slouken@1895
   210
    if (!inbuf || !*inbuf) {
slouken@1895
   211
        /* Reset the context */
slouken@1895
   212
        return 0;
slouken@1895
   213
    }
slouken@1895
   214
    if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
slouken@1895
   215
        return SDL_ICONV_E2BIG;
slouken@1895
   216
    }
slouken@1895
   217
    src = *inbuf;
slouken@1895
   218
    srclen = (inbytesleft ? *inbytesleft : 0);
slouken@1895
   219
    dst = *outbuf;
slouken@1895
   220
    dstlen = *outbytesleft;
slouken@1501
   221
slouken@1895
   222
    switch (cd->src_fmt) {
slouken@1895
   223
    case ENCODING_UTF16:
slouken@1895
   224
        /* Scan for a byte order marker */
slouken@1895
   225
        {
slouken@1895
   226
            Uint8 *p = (Uint8 *) src;
slouken@1895
   227
            size_t n = srclen / 2;
slouken@1895
   228
            while (n) {
slouken@1895
   229
                if (p[0] == 0xFF && p[1] == 0xFE) {
slouken@1895
   230
                    cd->src_fmt = ENCODING_UTF16BE;
slouken@1895
   231
                    break;
slouken@1895
   232
                } else if (p[0] == 0xFE && p[1] == 0xFF) {
slouken@1895
   233
                    cd->src_fmt = ENCODING_UTF16LE;
slouken@1895
   234
                    break;
slouken@1895
   235
                }
slouken@1895
   236
                p += 2;
slouken@1895
   237
                --n;
slouken@1895
   238
            }
slouken@1895
   239
            if (n == 0) {
slouken@1895
   240
                /* We can't tell, default to host order */
slouken@1895
   241
                cd->src_fmt = ENCODING_UTF16NATIVE;
slouken@1895
   242
            }
slouken@1895
   243
        }
slouken@1895
   244
        break;
slouken@1895
   245
    case ENCODING_UTF32:
slouken@1895
   246
        /* Scan for a byte order marker */
slouken@1895
   247
        {
slouken@1895
   248
            Uint8 *p = (Uint8 *) src;
slouken@1895
   249
            size_t n = srclen / 4;
slouken@1895
   250
            while (n) {
slouken@1895
   251
                if (p[0] == 0xFF && p[1] == 0xFE &&
slouken@1895
   252
                    p[2] == 0x00 && p[3] == 0x00) {
slouken@1895
   253
                    cd->src_fmt = ENCODING_UTF32BE;
slouken@1895
   254
                    break;
slouken@1895
   255
                } else if (p[0] == 0x00 && p[1] == 0x00 &&
slouken@1895
   256
                           p[2] == 0xFE && p[3] == 0xFF) {
slouken@1895
   257
                    cd->src_fmt = ENCODING_UTF32LE;
slouken@1895
   258
                    break;
slouken@1895
   259
                }
slouken@1895
   260
                p += 4;
slouken@1895
   261
                --n;
slouken@1895
   262
            }
slouken@1895
   263
            if (n == 0) {
slouken@1895
   264
                /* We can't tell, default to host order */
slouken@1895
   265
                cd->src_fmt = ENCODING_UTF32NATIVE;
slouken@1895
   266
            }
slouken@1895
   267
        }
slouken@1895
   268
        break;
slouken@1895
   269
    }
slouken@1501
   270
slouken@1895
   271
    switch (cd->dst_fmt) {
slouken@1895
   272
    case ENCODING_UTF16:
slouken@1895
   273
        /* Default to host order, need to add byte order marker */
slouken@1895
   274
        if (dstlen < 2) {
slouken@1895
   275
            return SDL_ICONV_E2BIG;
slouken@1895
   276
        }
slouken@1895
   277
        *(Uint16 *) dst = UNICODE_BOM;
slouken@1895
   278
        dst += 2;
slouken@1895
   279
        dstlen -= 2;
slouken@1895
   280
        cd->dst_fmt = ENCODING_UTF16NATIVE;
slouken@1895
   281
        break;
slouken@1895
   282
    case ENCODING_UTF32:
slouken@1895
   283
        /* Default to host order, need to add byte order marker */
slouken@1895
   284
        if (dstlen < 4) {
slouken@1895
   285
            return SDL_ICONV_E2BIG;
slouken@1895
   286
        }
slouken@1895
   287
        *(Uint32 *) dst = UNICODE_BOM;
slouken@1895
   288
        dst += 4;
slouken@1895
   289
        dstlen -= 4;
slouken@1895
   290
        cd->dst_fmt = ENCODING_UTF32NATIVE;
slouken@1895
   291
        break;
slouken@1895
   292
    }
slouken@1501
   293
slouken@1895
   294
    total = 0;
slouken@1895
   295
    while (srclen > 0) {
slouken@1895
   296
        /* Decode a character */
slouken@1895
   297
        switch (cd->src_fmt) {
slouken@1895
   298
        case ENCODING_ASCII:
slouken@1895
   299
            {
slouken@1895
   300
                Uint8 *p = (Uint8 *) src;
slouken@1895
   301
                ch = (Uint32) (p[0] & 0x7F);
slouken@1895
   302
                ++src;
slouken@1895
   303
                --srclen;
slouken@1895
   304
            }
slouken@1895
   305
            break;
slouken@1895
   306
        case ENCODING_LATIN1:
slouken@1895
   307
            {
slouken@1895
   308
                Uint8 *p = (Uint8 *) src;
slouken@1895
   309
                ch = (Uint32) p[0];
slouken@1895
   310
                ++src;
slouken@1895
   311
                --srclen;
slouken@1895
   312
            }
slouken@1895
   313
            break;
slouken@1895
   314
        case ENCODING_UTF8:    /* RFC 3629 */
slouken@1895
   315
            {
slouken@1895
   316
                Uint8 *p = (Uint8 *) src;
slouken@1895
   317
                size_t left = 0;
slouken@1895
   318
                SDL_bool overlong = SDL_FALSE;
slouken@1895
   319
                if (p[0] >= 0xFC) {
slouken@1895
   320
                    if ((p[0] & 0xFE) != 0xFC) {
slouken@1895
   321
                        /* Skip illegal sequences
slouken@1895
   322
                           return SDL_ICONV_EILSEQ;
slouken@1895
   323
                         */
slouken@1895
   324
                        ch = UNKNOWN_UNICODE;
slouken@1895
   325
                    } else {
slouken@1895
   326
                        if (p[0] == 0xFC) {
slouken@1895
   327
                            overlong = SDL_TRUE;
slouken@1895
   328
                        }
slouken@1895
   329
                        ch = (Uint32) (p[0] & 0x01);
slouken@1895
   330
                        left = 5;
slouken@1895
   331
                    }
slouken@1895
   332
                } else if (p[0] >= 0xF8) {
slouken@1895
   333
                    if ((p[0] & 0xFC) != 0xF8) {
slouken@1895
   334
                        /* Skip illegal sequences
slouken@1895
   335
                           return SDL_ICONV_EILSEQ;
slouken@1895
   336
                         */
slouken@1895
   337
                        ch = UNKNOWN_UNICODE;
slouken@1895
   338
                    } else {
slouken@1895
   339
                        if (p[0] == 0xF8) {
slouken@1895
   340
                            overlong = SDL_TRUE;
slouken@1895
   341
                        }
slouken@1895
   342
                        ch = (Uint32) (p[0] & 0x03);
slouken@1895
   343
                        left = 4;
slouken@1895
   344
                    }
slouken@1895
   345
                } else if (p[0] >= 0xF0) {
slouken@1895
   346
                    if ((p[0] & 0xF8) != 0xF0) {
slouken@1895
   347
                        /* Skip illegal sequences
slouken@1895
   348
                           return SDL_ICONV_EILSEQ;
slouken@1895
   349
                         */
slouken@1895
   350
                        ch = UNKNOWN_UNICODE;
slouken@1895
   351
                    } else {
slouken@1895
   352
                        if (p[0] == 0xF0) {
slouken@1895
   353
                            overlong = SDL_TRUE;
slouken@1895
   354
                        }
slouken@1895
   355
                        ch = (Uint32) (p[0] & 0x07);
slouken@1895
   356
                        left = 3;
slouken@1895
   357
                    }
slouken@1895
   358
                } else if (p[0] >= 0xE0) {
slouken@1895
   359
                    if ((p[0] & 0xF0) != 0xE0) {
slouken@1895
   360
                        /* Skip illegal sequences
slouken@1895
   361
                           return SDL_ICONV_EILSEQ;
slouken@1895
   362
                         */
slouken@1895
   363
                        ch = UNKNOWN_UNICODE;
slouken@1895
   364
                    } else {
slouken@1895
   365
                        if (p[0] == 0xE0) {
slouken@1895
   366
                            overlong = SDL_TRUE;
slouken@1895
   367
                        }
slouken@1895
   368
                        ch = (Uint32) (p[0] & 0x0F);
slouken@1895
   369
                        left = 2;
slouken@1895
   370
                    }
slouken@1895
   371
                } else if (p[0] >= 0xC0) {
slouken@1895
   372
                    if ((p[0] & 0xE0) != 0xC0) {
slouken@1895
   373
                        /* Skip illegal sequences
slouken@1895
   374
                           return SDL_ICONV_EILSEQ;
slouken@1895
   375
                         */
slouken@1895
   376
                        ch = UNKNOWN_UNICODE;
slouken@1895
   377
                    } else {
slouken@1895
   378
                        if ((p[0] & 0xCE) == 0xC0) {
slouken@1895
   379
                            overlong = SDL_TRUE;
slouken@1895
   380
                        }
slouken@1895
   381
                        ch = (Uint32) (p[0] & 0x1F);
slouken@1895
   382
                        left = 1;
slouken@1895
   383
                    }
slouken@1895
   384
                } else {
slouken@1895
   385
                    if ((p[0] & 0x80) != 0x00) {
slouken@1895
   386
                        /* Skip illegal sequences
slouken@1895
   387
                           return SDL_ICONV_EILSEQ;
slouken@1895
   388
                         */
slouken@1895
   389
                        ch = UNKNOWN_UNICODE;
slouken@1895
   390
                    } else {
slouken@1895
   391
                        ch = (Uint32) p[0];
slouken@1895
   392
                    }
slouken@1895
   393
                }
slouken@1895
   394
                ++src;
slouken@1895
   395
                --srclen;
slouken@1895
   396
                if (srclen < left) {
slouken@1895
   397
                    return SDL_ICONV_EINVAL;
slouken@1895
   398
                }
slouken@1895
   399
                while (left--) {
slouken@1895
   400
                    ++p;
slouken@1895
   401
                    if ((p[0] & 0xC0) != 0x80) {
slouken@1895
   402
                        /* Skip illegal sequences
slouken@1895
   403
                           return SDL_ICONV_EILSEQ;
slouken@1895
   404
                         */
slouken@1895
   405
                        ch = UNKNOWN_UNICODE;
slouken@1895
   406
                        break;
slouken@1895
   407
                    }
slouken@1895
   408
                    ch <<= 6;
slouken@1895
   409
                    ch |= (p[0] & 0x3F);
slouken@1895
   410
                    ++src;
slouken@1895
   411
                    --srclen;
slouken@1895
   412
                }
slouken@1895
   413
                if (overlong) {
slouken@1895
   414
                    /* Potential security risk
slouken@1895
   415
                       return SDL_ICONV_EILSEQ;
slouken@1895
   416
                     */
slouken@1895
   417
                    ch = UNKNOWN_UNICODE;
slouken@1895
   418
                }
slouken@1895
   419
                if ((ch >= 0xD800 && ch <= 0xDFFF) ||
slouken@1895
   420
                    (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
slouken@1895
   421
                    /* Skip illegal sequences
slouken@1895
   422
                       return SDL_ICONV_EILSEQ;
slouken@1895
   423
                     */
slouken@1895
   424
                    ch = UNKNOWN_UNICODE;
slouken@1895
   425
                }
slouken@1895
   426
            }
slouken@1895
   427
            break;
slouken@1895
   428
        case ENCODING_UTF16BE: /* RFC 2781 */
slouken@1895
   429
            {
slouken@1895
   430
                Uint8 *p = (Uint8 *) src;
slouken@1895
   431
                Uint16 W1, W2;
slouken@1895
   432
                if (srclen < 2) {
slouken@1895
   433
                    return SDL_ICONV_EINVAL;
slouken@1895
   434
                }
slouken@1895
   435
                W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
slouken@1895
   436
                src += 2;
slouken@1895
   437
                srclen -= 2;
slouken@1895
   438
                if (W1 < 0xD800 || W1 > 0xDFFF) {
slouken@1895
   439
                    ch = (Uint32) W1;
slouken@1895
   440
                    break;
slouken@1895
   441
                }
slouken@1895
   442
                if (W1 > 0xDBFF) {
slouken@1895
   443
                    /* Skip illegal sequences
slouken@1895
   444
                       return SDL_ICONV_EILSEQ;
slouken@1895
   445
                     */
slouken@1895
   446
                    ch = UNKNOWN_UNICODE;
slouken@1895
   447
                    break;
slouken@1895
   448
                }
slouken@1895
   449
                if (srclen < 2) {
slouken@1895
   450
                    return SDL_ICONV_EINVAL;
slouken@1895
   451
                }
slouken@1895
   452
                p = (Uint8 *) src;
slouken@1895
   453
                W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
slouken@1895
   454
                src += 2;
slouken@1895
   455
                srclen -= 2;
slouken@1895
   456
                if (W2 < 0xDC00 || W2 > 0xDFFF) {
slouken@1895
   457
                    /* Skip illegal sequences
slouken@1895
   458
                       return SDL_ICONV_EILSEQ;
slouken@1895
   459
                     */
slouken@1895
   460
                    ch = UNKNOWN_UNICODE;
slouken@1895
   461
                    break;
slouken@1895
   462
                }
slouken@1895
   463
                ch = (((Uint32) (W1 & 0x3FF) << 10) |
slouken@1895
   464
                      (Uint32) (W2 & 0x3FF)) + 0x10000;
slouken@1895
   465
            }
slouken@1895
   466
            break;
slouken@1895
   467
        case ENCODING_UTF16LE: /* RFC 2781 */
slouken@1895
   468
            {
slouken@1895
   469
                Uint8 *p = (Uint8 *) src;
slouken@1895
   470
                Uint16 W1, W2;
slouken@1895
   471
                if (srclen < 2) {
slouken@1895
   472
                    return SDL_ICONV_EINVAL;
slouken@1895
   473
                }
slouken@1895
   474
                W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
slouken@1895
   475
                src += 2;
slouken@1895
   476
                srclen -= 2;
slouken@1895
   477
                if (W1 < 0xD800 || W1 > 0xDFFF) {
slouken@1895
   478
                    ch = (Uint32) W1;
slouken@1895
   479
                    break;
slouken@1895
   480
                }
slouken@1895
   481
                if (W1 > 0xDBFF) {
slouken@1895
   482
                    /* Skip illegal sequences
slouken@1895
   483
                       return SDL_ICONV_EILSEQ;
slouken@1895
   484
                     */
slouken@1895
   485
                    ch = UNKNOWN_UNICODE;
slouken@1895
   486
                    break;
slouken@1895
   487
                }
slouken@1895
   488
                if (srclen < 2) {
slouken@1895
   489
                    return SDL_ICONV_EINVAL;
slouken@1895
   490
                }
slouken@1895
   491
                p = (Uint8 *) src;
slouken@1895
   492
                W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
slouken@1895
   493
                src += 2;
slouken@1895
   494
                srclen -= 2;
slouken@1895
   495
                if (W2 < 0xDC00 || W2 > 0xDFFF) {
slouken@1895
   496
                    /* Skip illegal sequences
slouken@1895
   497
                       return SDL_ICONV_EILSEQ;
slouken@1895
   498
                     */
slouken@1895
   499
                    ch = UNKNOWN_UNICODE;
slouken@1895
   500
                    break;
slouken@1895
   501
                }
slouken@1895
   502
                ch = (((Uint32) (W1 & 0x3FF) << 10) |
slouken@1895
   503
                      (Uint32) (W2 & 0x3FF)) + 0x10000;
slouken@1895
   504
            }
slouken@1895
   505
            break;
slouken@1895
   506
        case ENCODING_UTF32BE:
slouken@1895
   507
            {
slouken@1895
   508
                Uint8 *p = (Uint8 *) src;
slouken@1895
   509
                if (srclen < 4) {
slouken@1895
   510
                    return SDL_ICONV_EINVAL;
slouken@1895
   511
                }
slouken@1895
   512
                ch = ((Uint32) p[0] << 24) |
slouken@1895
   513
                    ((Uint32) p[1] << 16) |
slouken@1895
   514
                    ((Uint32) p[2] << 8) | (Uint32) p[3];
slouken@1895
   515
                src += 4;
slouken@1895
   516
                srclen -= 4;
slouken@1895
   517
            }
slouken@1895
   518
            break;
slouken@1895
   519
        case ENCODING_UTF32LE:
slouken@1895
   520
            {
slouken@1895
   521
                Uint8 *p = (Uint8 *) src;
slouken@1895
   522
                if (srclen < 4) {
slouken@1895
   523
                    return SDL_ICONV_EINVAL;
slouken@1895
   524
                }
slouken@1895
   525
                ch = ((Uint32) p[3] << 24) |
slouken@1895
   526
                    ((Uint32) p[2] << 16) |
slouken@1895
   527
                    ((Uint32) p[1] << 8) | (Uint32) p[0];
slouken@1895
   528
                src += 4;
slouken@1895
   529
                srclen -= 4;
slouken@1895
   530
            }
slouken@1895
   531
            break;
slouken@1895
   532
        case ENCODING_UCS2:
slouken@1895
   533
            {
slouken@1895
   534
                Uint16 *p = (Uint16 *) src;
slouken@1895
   535
                if (srclen < 2) {
slouken@1895
   536
                    return SDL_ICONV_EINVAL;
slouken@1895
   537
                }
slouken@1895
   538
                ch = *p;
slouken@1895
   539
                src += 2;
slouken@1895
   540
                srclen -= 2;
slouken@1895
   541
            }
slouken@1895
   542
            break;
slouken@1895
   543
        case ENCODING_UCS4:
slouken@1895
   544
            {
slouken@1895
   545
                Uint32 *p = (Uint32 *) src;
slouken@1895
   546
                if (srclen < 4) {
slouken@1895
   547
                    return SDL_ICONV_EINVAL;
slouken@1895
   548
                }
slouken@1895
   549
                ch = *p;
slouken@1895
   550
                src += 4;
slouken@1895
   551
                srclen -= 4;
slouken@1895
   552
            }
slouken@1895
   553
            break;
slouken@1895
   554
        }
slouken@1501
   555
slouken@1895
   556
        /* Encode a character */
slouken@1895
   557
        switch (cd->dst_fmt) {
slouken@1895
   558
        case ENCODING_ASCII:
slouken@1895
   559
            {
slouken@1895
   560
                Uint8 *p = (Uint8 *) dst;
slouken@1895
   561
                if (dstlen < 1) {
slouken@1895
   562
                    return SDL_ICONV_E2BIG;
slouken@1895
   563
                }
slouken@1895
   564
                if (ch > 0x7F) {
slouken@1895
   565
                    *p = UNKNOWN_ASCII;
slouken@1895
   566
                } else {
slouken@1895
   567
                    *p = (Uint8) ch;
slouken@1895
   568
                }
slouken@1895
   569
                ++dst;
slouken@1895
   570
                --dstlen;
slouken@1895
   571
            }
slouken@1895
   572
            break;
slouken@1895
   573
        case ENCODING_LATIN1:
slouken@1895
   574
            {
slouken@1895
   575
                Uint8 *p = (Uint8 *) dst;
slouken@1895
   576
                if (dstlen < 1) {
slouken@1895
   577
                    return SDL_ICONV_E2BIG;
slouken@1895
   578
                }
slouken@1895
   579
                if (ch > 0xFF) {
slouken@1895
   580
                    *p = UNKNOWN_ASCII;
slouken@1895
   581
                } else {
slouken@1895
   582
                    *p = (Uint8) ch;
slouken@1895
   583
                }
slouken@1895
   584
                ++dst;
slouken@1895
   585
                --dstlen;
slouken@1895
   586
            }
slouken@1895
   587
            break;
slouken@1895
   588
        case ENCODING_UTF8:    /* RFC 3629 */
slouken@1895
   589
            {
slouken@1895
   590
                Uint8 *p = (Uint8 *) dst;
slouken@1895
   591
                if (ch > 0x10FFFF) {
slouken@1895
   592
                    ch = UNKNOWN_UNICODE;
slouken@1895
   593
                }
slouken@1895
   594
                if (ch <= 0x7F) {
slouken@1895
   595
                    if (dstlen < 1) {
slouken@1895
   596
                        return SDL_ICONV_E2BIG;
slouken@1895
   597
                    }
slouken@1895
   598
                    *p = (Uint8) ch;
slouken@1895
   599
                    ++dst;
slouken@1895
   600
                    --dstlen;
slouken@1895
   601
                } else if (ch <= 0x7FF) {
slouken@1895
   602
                    if (dstlen < 2) {
slouken@1895
   603
                        return SDL_ICONV_E2BIG;
slouken@1895
   604
                    }
slouken@1895
   605
                    p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
slouken@1895
   606
                    p[1] = 0x80 | (Uint8) (ch & 0x3F);
slouken@1895
   607
                    dst += 2;
slouken@1895
   608
                    dstlen -= 2;
slouken@1895
   609
                } else if (ch <= 0xFFFF) {
slouken@1895
   610
                    if (dstlen < 3) {
slouken@1895
   611
                        return SDL_ICONV_E2BIG;
slouken@1895
   612
                    }
slouken@1895
   613
                    p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
slouken@1895
   614
                    p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
slouken@1895
   615
                    p[2] = 0x80 | (Uint8) (ch & 0x3F);
slouken@1895
   616
                    dst += 3;
slouken@1895
   617
                    dstlen -= 3;
slouken@1895
   618
                } else if (ch <= 0x1FFFFF) {
slouken@1895
   619
                    if (dstlen < 4) {
slouken@1895
   620
                        return SDL_ICONV_E2BIG;
slouken@1895
   621
                    }
slouken@1895
   622
                    p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
slouken@1895
   623
                    p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
slouken@1895
   624
                    p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
slouken@1895
   625
                    p[3] = 0x80 | (Uint8) (ch & 0x3F);
slouken@1895
   626
                    dst += 4;
slouken@1895
   627
                    dstlen -= 4;
slouken@1895
   628
                } else if (ch <= 0x3FFFFFF) {
slouken@1895
   629
                    if (dstlen < 5) {
slouken@1895
   630
                        return SDL_ICONV_E2BIG;
slouken@1895
   631
                    }
slouken@1895
   632
                    p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
slouken@1895
   633
                    p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
slouken@1895
   634
                    p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
slouken@1895
   635
                    p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
slouken@1895
   636
                    p[4] = 0x80 | (Uint8) (ch & 0x3F);
slouken@1895
   637
                    dst += 5;
slouken@1895
   638
                    dstlen -= 5;
slouken@1895
   639
                } else {
slouken@1895
   640
                    if (dstlen < 6) {
slouken@1895
   641
                        return SDL_ICONV_E2BIG;
slouken@1895
   642
                    }
slouken@1895
   643
                    p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
slouken@1895
   644
                    p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
slouken@1895
   645
                    p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
slouken@1895
   646
                    p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
slouken@1895
   647
                    p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
slouken@1895
   648
                    p[5] = 0x80 | (Uint8) (ch & 0x3F);
slouken@1895
   649
                    dst += 6;
slouken@1895
   650
                    dstlen -= 6;
slouken@1895
   651
                }
slouken@1895
   652
            }
slouken@1895
   653
            break;
slouken@1895
   654
        case ENCODING_UTF16BE: /* RFC 2781 */
slouken@1895
   655
            {
slouken@1895
   656
                Uint8 *p = (Uint8 *) dst;
slouken@1895
   657
                if (ch > 0x10FFFF) {
slouken@1895
   658
                    ch = UNKNOWN_UNICODE;
slouken@1895
   659
                }
slouken@1895
   660
                if (ch < 0x10000) {
slouken@1895
   661
                    if (dstlen < 2) {
slouken@1895
   662
                        return SDL_ICONV_E2BIG;
slouken@1895
   663
                    }
slouken@1895
   664
                    p[0] = (Uint8) (ch >> 8);
slouken@1895
   665
                    p[1] = (Uint8) ch;
slouken@1895
   666
                    dst += 2;
slouken@1895
   667
                    dstlen -= 2;
slouken@1895
   668
                } else {
slouken@1895
   669
                    Uint16 W1, W2;
slouken@1895
   670
                    if (dstlen < 4) {
slouken@1895
   671
                        return SDL_ICONV_E2BIG;
slouken@1895
   672
                    }
slouken@1895
   673
                    ch = ch - 0x10000;
slouken@1895
   674
                    W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
slouken@1895
   675
                    W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
slouken@1895
   676
                    p[0] = (Uint8) (W1 >> 8);
slouken@1895
   677
                    p[1] = (Uint8) W1;
slouken@1895
   678
                    p[2] = (Uint8) (W2 >> 8);
slouken@1895
   679
                    p[3] = (Uint8) W2;
slouken@1895
   680
                    dst += 4;
slouken@1895
   681
                    dstlen -= 4;
slouken@1895
   682
                }
slouken@1895
   683
            }
slouken@1895
   684
            break;
slouken@1895
   685
        case ENCODING_UTF16LE: /* RFC 2781 */
slouken@1895
   686
            {
slouken@1895
   687
                Uint8 *p = (Uint8 *) dst;
slouken@1895
   688
                if (ch > 0x10FFFF) {
slouken@1895
   689
                    ch = UNKNOWN_UNICODE;
slouken@1895
   690
                }
slouken@1895
   691
                if (ch < 0x10000) {
slouken@1895
   692
                    if (dstlen < 2) {
slouken@1895
   693
                        return SDL_ICONV_E2BIG;
slouken@1895
   694
                    }
slouken@1895
   695
                    p[1] = (Uint8) (ch >> 8);
slouken@1895
   696
                    p[0] = (Uint8) ch;
slouken@1895
   697
                    dst += 2;
slouken@1895
   698
                    dstlen -= 2;
slouken@1895
   699
                } else {
slouken@1895
   700
                    Uint16 W1, W2;
slouken@1895
   701
                    if (dstlen < 4) {
slouken@1895
   702
                        return SDL_ICONV_E2BIG;
slouken@1895
   703
                    }
slouken@1895
   704
                    ch = ch - 0x10000;
slouken@1895
   705
                    W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
slouken@1895
   706
                    W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
slouken@1895
   707
                    p[1] = (Uint8) (W1 >> 8);
slouken@1895
   708
                    p[0] = (Uint8) W1;
slouken@1895
   709
                    p[3] = (Uint8) (W2 >> 8);
slouken@1895
   710
                    p[2] = (Uint8) W2;
slouken@1895
   711
                    dst += 4;
slouken@1895
   712
                    dstlen -= 4;
slouken@1895
   713
                }
slouken@1895
   714
            }
slouken@1895
   715
            break;
slouken@1895
   716
        case ENCODING_UTF32BE:
slouken@1895
   717
            {
slouken@1895
   718
                Uint8 *p = (Uint8 *) dst;
slouken@1895
   719
                if (ch > 0x10FFFF) {
slouken@1895
   720
                    ch = UNKNOWN_UNICODE;
slouken@1895
   721
                }
slouken@1895
   722
                if (dstlen < 4) {
slouken@1895
   723
                    return SDL_ICONV_E2BIG;
slouken@1895
   724
                }
slouken@1895
   725
                p[0] = (Uint8) (ch >> 24);
slouken@1895
   726
                p[1] = (Uint8) (ch >> 16);
slouken@1895
   727
                p[2] = (Uint8) (ch >> 8);
slouken@1895
   728
                p[3] = (Uint8) ch;
slouken@1895
   729
                dst += 4;
slouken@1895
   730
                dstlen -= 4;
slouken@1895
   731
            }
slouken@1895
   732
            break;
slouken@1895
   733
        case ENCODING_UTF32LE:
slouken@1895
   734
            {
slouken@1895
   735
                Uint8 *p = (Uint8 *) dst;
slouken@1895
   736
                if (ch > 0x10FFFF) {
slouken@1895
   737
                    ch = UNKNOWN_UNICODE;
slouken@1895
   738
                }
slouken@1895
   739
                if (dstlen < 4) {
slouken@1895
   740
                    return SDL_ICONV_E2BIG;
slouken@1895
   741
                }
slouken@1895
   742
                p[3] = (Uint8) (ch >> 24);
slouken@1895
   743
                p[2] = (Uint8) (ch >> 16);
slouken@1895
   744
                p[1] = (Uint8) (ch >> 8);
slouken@1895
   745
                p[0] = (Uint8) ch;
slouken@1895
   746
                dst += 4;
slouken@1895
   747
                dstlen -= 4;
slouken@1895
   748
            }
slouken@1895
   749
            break;
slouken@1895
   750
        case ENCODING_UCS2:
slouken@1895
   751
            {
slouken@1895
   752
                Uint16 *p = (Uint16 *) dst;
slouken@1895
   753
                if (ch > 0xFFFF) {
slouken@1895
   754
                    ch = UNKNOWN_UNICODE;
slouken@1895
   755
                }
slouken@1895
   756
                if (dstlen < 2) {
slouken@1895
   757
                    return SDL_ICONV_E2BIG;
slouken@1895
   758
                }
slouken@1895
   759
                *p = (Uint16) ch;
slouken@1895
   760
                dst += 2;
slouken@1895
   761
                dstlen -= 2;
slouken@1895
   762
            }
slouken@1895
   763
            break;
slouken@1895
   764
        case ENCODING_UCS4:
slouken@1895
   765
            {
slouken@1895
   766
                Uint32 *p = (Uint32 *) dst;
slouken@1895
   767
                if (ch > 0x7FFFFFFF) {
slouken@1895
   768
                    ch = UNKNOWN_UNICODE;
slouken@1895
   769
                }
slouken@1895
   770
                if (dstlen < 4) {
slouken@1895
   771
                    return SDL_ICONV_E2BIG;
slouken@1895
   772
                }
slouken@1895
   773
                *p = ch;
slouken@1895
   774
                dst += 4;
slouken@1895
   775
                dstlen -= 4;
slouken@1895
   776
            }
slouken@1895
   777
            break;
slouken@1895
   778
        }
slouken@1501
   779
slouken@1895
   780
        /* Update state */
slouken@1895
   781
        *inbuf = src;
slouken@1895
   782
        *inbytesleft = srclen;
slouken@1895
   783
        *outbuf = dst;
slouken@1895
   784
        *outbytesleft = dstlen;
slouken@1895
   785
        ++total;
slouken@1895
   786
    }
slouken@1895
   787
    return total;
slouken@1501
   788
}
slouken@1501
   789
slouken@1895
   790
int
slouken@1895
   791
SDL_iconv_close(SDL_iconv_t cd)
slouken@1501
   792
{
slouken@1895
   793
    if (cd && cd != (SDL_iconv_t) - 1) {
slouken@1895
   794
        SDL_free(cd);
slouken@1895
   795
    }
slouken@1895
   796
    return 0;
slouken@1501
   797
}
slouken@1501
   798
slouken@1501
   799
#endif /* !HAVE_ICONV */
slouken@1501
   800
slouken@1895
   801
char *
slouken@2135
   802
SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
slouken@1895
   803
                 size_t inbytesleft)
slouken@1501
   804
{
slouken@1895
   805
    SDL_iconv_t cd;
slouken@1895
   806
    char *string;
slouken@1895
   807
    size_t stringsize;
slouken@1895
   808
    char *outbuf;
slouken@1895
   809
    size_t outbytesleft;
slouken@1895
   810
    size_t retCode = 0;
slouken@1501
   811
slouken@2182
   812
    cd = SDL_iconv_open(tocode, fromcode);
slouken@2182
   813
    if (cd == (SDL_iconv_t) - 1) {
slouken@2182
   814
        /* See if we can recover here (fixes iconv on Solaris 11) */
slouken@2182
   815
        if (!tocode || !*tocode) {
slouken@2182
   816
            tocode = "UTF-8";
slouken@2182
   817
        }
slouken@2182
   818
        if (!fromcode || !*fromcode) {
slouken@2182
   819
            tocode = "UTF-8";
slouken@2182
   820
        }
slouken@2182
   821
        cd = SDL_iconv_open(tocode, fromcode);
slouken@2143
   822
    }
slouken@1895
   823
    if (cd == (SDL_iconv_t) - 1) {
slouken@1895
   824
        return NULL;
slouken@1895
   825
    }
slouken@1501
   826
slouken@1895
   827
    stringsize = inbytesleft > 4 ? inbytesleft : 4;
slouken@1895
   828
    string = SDL_malloc(stringsize);
slouken@1895
   829
    if (!string) {
slouken@1895
   830
        SDL_iconv_close(cd);
slouken@1895
   831
        return NULL;
slouken@1895
   832
    }
slouken@1895
   833
    outbuf = string;
slouken@1895
   834
    outbytesleft = stringsize;
slouken@1895
   835
    SDL_memset(outbuf, 0, 4);
slouken@1501
   836
slouken@1895
   837
    while (inbytesleft > 0) {
slouken@1895
   838
        retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
slouken@1895
   839
        switch (retCode) {
slouken@1895
   840
        case SDL_ICONV_E2BIG:
slouken@1895
   841
            {
slouken@1895
   842
                char *oldstring = string;
slouken@1895
   843
                stringsize *= 2;
slouken@1895
   844
                string = SDL_realloc(string, stringsize);
slouken@1895
   845
                if (!string) {
slouken@1895
   846
                    SDL_iconv_close(cd);
slouken@1895
   847
                    return NULL;
slouken@1895
   848
                }
slouken@1895
   849
                outbuf = string + (outbuf - oldstring);
slouken@1895
   850
                outbytesleft = stringsize - (outbuf - string);
slouken@1895
   851
                SDL_memset(outbuf, 0, 4);
slouken@1895
   852
            }
slouken@1895
   853
            break;
slouken@1895
   854
        case SDL_ICONV_EILSEQ:
slouken@1895
   855
            /* Try skipping some input data - not perfect, but... */
slouken@1895
   856
            ++inbuf;
slouken@1895
   857
            --inbytesleft;
slouken@1895
   858
            break;
slouken@1895
   859
        case SDL_ICONV_EINVAL:
slouken@1895
   860
        case SDL_ICONV_ERROR:
slouken@1895
   861
            /* We can't continue... */
slouken@1895
   862
            inbytesleft = 0;
slouken@1895
   863
            break;
slouken@1895
   864
        }
slouken@1895
   865
    }
slouken@1895
   866
    SDL_iconv_close(cd);
slouken@1501
   867
slouken@1895
   868
    return string;
slouken@1501
   869
}
slouken@1895
   870
slouken@1895
   871
/* vi: set ts=4 sw=4 expandtab: */