src/stdlib/SDL_iconv.c
author Ryan C. Gordon
Mon, 22 Aug 2011 14:25:11 -0400
changeset 5633 21a6e87905e3
parent 5535 96594ac5fd1a
child 6138 4c64952a58fb
permissions -rw-r--r--
Apple's C runtime has the non-const iconv(), too.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2011 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /* This file contains portable iconv functions for SDL */
    24 
    25 #include "SDL_stdinc.h"
    26 #include "SDL_endian.h"
    27 
    28 #ifdef HAVE_ICONV
    29 
    30 /* Depending on which standard the iconv() was implemented with,
    31    iconv() may or may not use const char ** for the inbuf param.
    32    If we get this wrong, it's just a warning, so no big deal.
    33 */
    34 #if defined(_XGP6) || defined(__APPLE__) || \
    35     (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)))
    36 #define ICONV_INBUF_NONCONST
    37 #endif
    38 
    39 #include <errno.h>
    40 
    41 size_t
    42 SDL_iconv(SDL_iconv_t cd,
    43           const char **inbuf, size_t * inbytesleft,
    44           char **outbuf, size_t * outbytesleft)
    45 {
    46     size_t retCode;
    47 #ifdef ICONV_INBUF_NONCONST
    48     retCode = iconv(cd, (char **) inbuf, inbytesleft, outbuf, outbytesleft);
    49 #else
    50     retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
    51 #endif
    52     if (retCode == (size_t) - 1) {
    53         switch (errno) {
    54         case E2BIG:
    55             return SDL_ICONV_E2BIG;
    56         case EILSEQ:
    57             return SDL_ICONV_EILSEQ;
    58         case EINVAL:
    59             return SDL_ICONV_EINVAL;
    60         default:
    61             return SDL_ICONV_ERROR;
    62         }
    63     }
    64     return retCode;
    65 }
    66 
    67 #else
    68 
    69 /* Lots of useful information on Unicode at:
    70 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    71 */
    72 
    73 #define UNICODE_BOM	0xFEFF
    74 
    75 #define UNKNOWN_ASCII	'?'
    76 #define UNKNOWN_UNICODE	0xFFFD
    77 
    78 enum
    79 {
    80     ENCODING_UNKNOWN,
    81     ENCODING_ASCII,
    82     ENCODING_LATIN1,
    83     ENCODING_UTF8,
    84     ENCODING_UTF16,             /* Needs byte order marker */
    85     ENCODING_UTF16BE,
    86     ENCODING_UTF16LE,
    87     ENCODING_UTF32,             /* Needs byte order marker */
    88     ENCODING_UTF32BE,
    89     ENCODING_UTF32LE,
    90     ENCODING_UCS2,              /* Native byte order assumed */
    91     ENCODING_UCS4,              /* Native byte order assumed */
    92 };
    93 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    94 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    95 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    96 #else
    97 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
    98 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
    99 #endif
   100 
   101 struct _SDL_iconv_t
   102 {
   103     int src_fmt;
   104     int dst_fmt;
   105 };
   106 
   107 static struct
   108 {
   109     const char *name;
   110     int format;
   111 } encodings[] = {
   112 /* *INDENT-OFF* */
   113     { "ASCII", ENCODING_ASCII },
   114     { "US-ASCII", ENCODING_ASCII },
   115     { "8859-1", ENCODING_LATIN1 },
   116     { "ISO-8859-1", ENCODING_LATIN1 },
   117     { "UTF8", ENCODING_UTF8 },
   118     { "UTF-8", ENCODING_UTF8 },
   119     { "UTF16", ENCODING_UTF16 },
   120     { "UTF-16", ENCODING_UTF16 },
   121     { "UTF16BE", ENCODING_UTF16BE },
   122     { "UTF-16BE", ENCODING_UTF16BE },
   123     { "UTF16LE", ENCODING_UTF16LE },
   124     { "UTF-16LE", ENCODING_UTF16LE },
   125     { "UTF32", ENCODING_UTF32 },
   126     { "UTF-32", ENCODING_UTF32 },
   127     { "UTF32BE", ENCODING_UTF32BE },
   128     { "UTF-32BE", ENCODING_UTF32BE },
   129     { "UTF32LE", ENCODING_UTF32LE },
   130     { "UTF-32LE", ENCODING_UTF32LE },
   131     { "UCS2", ENCODING_UCS2 },
   132     { "UCS-2", ENCODING_UCS2 },
   133     { "UCS4", ENCODING_UCS4 },
   134     { "UCS-4", ENCODING_UCS4 },
   135 /* *INDENT-ON* */
   136 };
   137 
   138 static const char *
   139 getlocale(char *buffer, size_t bufsize)
   140 {
   141     const char *lang;
   142     char *ptr;
   143 
   144     lang = SDL_getenv("LC_ALL");
   145     if (!lang) {
   146         lang = SDL_getenv("LC_CTYPE");
   147     }
   148     if (!lang) {
   149         lang = SDL_getenv("LC_MESSAGES");
   150     }
   151     if (!lang) {
   152         lang = SDL_getenv("LANG");
   153     }
   154     if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
   155         lang = "ASCII";
   156     }
   157 
   158     /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
   159     ptr = SDL_strchr(lang, '.');
   160     if (ptr != NULL) {
   161         lang = ptr + 1;
   162     }
   163 
   164     SDL_strlcpy(buffer, lang, bufsize);
   165     ptr = SDL_strchr(buffer, '@');
   166     if (ptr != NULL) {
   167         *ptr = '\0';            /* chop end of string. */
   168     }
   169 
   170     return buffer;
   171 }
   172 
   173 SDL_iconv_t
   174 SDL_iconv_open(const char *tocode, const char *fromcode)
   175 {
   176     int src_fmt = ENCODING_UNKNOWN;
   177     int dst_fmt = ENCODING_UNKNOWN;
   178     int i;
   179     char fromcode_buffer[64];
   180     char tocode_buffer[64];
   181 
   182     if (!fromcode || !*fromcode) {
   183         fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
   184     }
   185     if (!tocode || !*tocode) {
   186         tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
   187     }
   188     for (i = 0; i < SDL_arraysize(encodings); ++i) {
   189         if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
   190             src_fmt = encodings[i].format;
   191             if (dst_fmt != ENCODING_UNKNOWN) {
   192                 break;
   193             }
   194         }
   195         if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
   196             dst_fmt = encodings[i].format;
   197             if (src_fmt != ENCODING_UNKNOWN) {
   198                 break;
   199             }
   200         }
   201     }
   202     if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
   203         SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
   204         if (cd) {
   205             cd->src_fmt = src_fmt;
   206             cd->dst_fmt = dst_fmt;
   207             return cd;
   208         }
   209     }
   210     return (SDL_iconv_t) - 1;
   211 }
   212 
   213 size_t
   214 SDL_iconv(SDL_iconv_t cd,
   215           const char **inbuf, size_t * inbytesleft,
   216           char **outbuf, size_t * outbytesleft)
   217 {
   218     /* For simplicity, we'll convert everything to and from UCS-4 */
   219     const char *src;
   220     char *dst;
   221     size_t srclen, dstlen;
   222     Uint32 ch = 0;
   223     size_t total;
   224 
   225     if (!inbuf || !*inbuf) {
   226         /* Reset the context */
   227         return 0;
   228     }
   229     if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
   230         return SDL_ICONV_E2BIG;
   231     }
   232     src = *inbuf;
   233     srclen = (inbytesleft ? *inbytesleft : 0);
   234     dst = *outbuf;
   235     dstlen = *outbytesleft;
   236 
   237     switch (cd->src_fmt) {
   238     case ENCODING_UTF16:
   239         /* Scan for a byte order marker */
   240         {
   241             Uint8 *p = (Uint8 *) src;
   242             size_t n = srclen / 2;
   243             while (n) {
   244                 if (p[0] == 0xFF && p[1] == 0xFE) {
   245                     cd->src_fmt = ENCODING_UTF16BE;
   246                     break;
   247                 } else if (p[0] == 0xFE && p[1] == 0xFF) {
   248                     cd->src_fmt = ENCODING_UTF16LE;
   249                     break;
   250                 }
   251                 p += 2;
   252                 --n;
   253             }
   254             if (n == 0) {
   255                 /* We can't tell, default to host order */
   256                 cd->src_fmt = ENCODING_UTF16NATIVE;
   257             }
   258         }
   259         break;
   260     case ENCODING_UTF32:
   261         /* Scan for a byte order marker */
   262         {
   263             Uint8 *p = (Uint8 *) src;
   264             size_t n = srclen / 4;
   265             while (n) {
   266                 if (p[0] == 0xFF && p[1] == 0xFE &&
   267                     p[2] == 0x00 && p[3] == 0x00) {
   268                     cd->src_fmt = ENCODING_UTF32BE;
   269                     break;
   270                 } else if (p[0] == 0x00 && p[1] == 0x00 &&
   271                            p[2] == 0xFE && p[3] == 0xFF) {
   272                     cd->src_fmt = ENCODING_UTF32LE;
   273                     break;
   274                 }
   275                 p += 4;
   276                 --n;
   277             }
   278             if (n == 0) {
   279                 /* We can't tell, default to host order */
   280                 cd->src_fmt = ENCODING_UTF32NATIVE;
   281             }
   282         }
   283         break;
   284     }
   285 
   286     switch (cd->dst_fmt) {
   287     case ENCODING_UTF16:
   288         /* Default to host order, need to add byte order marker */
   289         if (dstlen < 2) {
   290             return SDL_ICONV_E2BIG;
   291         }
   292         *(Uint16 *) dst = UNICODE_BOM;
   293         dst += 2;
   294         dstlen -= 2;
   295         cd->dst_fmt = ENCODING_UTF16NATIVE;
   296         break;
   297     case ENCODING_UTF32:
   298         /* Default to host order, need to add byte order marker */
   299         if (dstlen < 4) {
   300             return SDL_ICONV_E2BIG;
   301         }
   302         *(Uint32 *) dst = UNICODE_BOM;
   303         dst += 4;
   304         dstlen -= 4;
   305         cd->dst_fmt = ENCODING_UTF32NATIVE;
   306         break;
   307     }
   308 
   309     total = 0;
   310     while (srclen > 0) {
   311         /* Decode a character */
   312         switch (cd->src_fmt) {
   313         case ENCODING_ASCII:
   314             {
   315                 Uint8 *p = (Uint8 *) src;
   316                 ch = (Uint32) (p[0] & 0x7F);
   317                 ++src;
   318                 --srclen;
   319             }
   320             break;
   321         case ENCODING_LATIN1:
   322             {
   323                 Uint8 *p = (Uint8 *) src;
   324                 ch = (Uint32) p[0];
   325                 ++src;
   326                 --srclen;
   327             }
   328             break;
   329         case ENCODING_UTF8:    /* RFC 3629 */
   330             {
   331                 Uint8 *p = (Uint8 *) src;
   332                 size_t left = 0;
   333                 SDL_bool overlong = SDL_FALSE;
   334                 if (p[0] >= 0xFC) {
   335                     if ((p[0] & 0xFE) != 0xFC) {
   336                         /* Skip illegal sequences
   337                            return SDL_ICONV_EILSEQ;
   338                          */
   339                         ch = UNKNOWN_UNICODE;
   340                     } else {
   341                         if (p[0] == 0xFC) {
   342                             overlong = SDL_TRUE;
   343                         }
   344                         ch = (Uint32) (p[0] & 0x01);
   345                         left = 5;
   346                     }
   347                 } else if (p[0] >= 0xF8) {
   348                     if ((p[0] & 0xFC) != 0xF8) {
   349                         /* Skip illegal sequences
   350                            return SDL_ICONV_EILSEQ;
   351                          */
   352                         ch = UNKNOWN_UNICODE;
   353                     } else {
   354                         if (p[0] == 0xF8) {
   355                             overlong = SDL_TRUE;
   356                         }
   357                         ch = (Uint32) (p[0] & 0x03);
   358                         left = 4;
   359                     }
   360                 } else if (p[0] >= 0xF0) {
   361                     if ((p[0] & 0xF8) != 0xF0) {
   362                         /* Skip illegal sequences
   363                            return SDL_ICONV_EILSEQ;
   364                          */
   365                         ch = UNKNOWN_UNICODE;
   366                     } else {
   367                         if (p[0] == 0xF0) {
   368                             overlong = SDL_TRUE;
   369                         }
   370                         ch = (Uint32) (p[0] & 0x07);
   371                         left = 3;
   372                     }
   373                 } else if (p[0] >= 0xE0) {
   374                     if ((p[0] & 0xF0) != 0xE0) {
   375                         /* Skip illegal sequences
   376                            return SDL_ICONV_EILSEQ;
   377                          */
   378                         ch = UNKNOWN_UNICODE;
   379                     } else {
   380                         if (p[0] == 0xE0) {
   381                             overlong = SDL_TRUE;
   382                         }
   383                         ch = (Uint32) (p[0] & 0x0F);
   384                         left = 2;
   385                     }
   386                 } else if (p[0] >= 0xC0) {
   387                     if ((p[0] & 0xE0) != 0xC0) {
   388                         /* Skip illegal sequences
   389                            return SDL_ICONV_EILSEQ;
   390                          */
   391                         ch = UNKNOWN_UNICODE;
   392                     } else {
   393                         if ((p[0] & 0xDE) == 0xC0) {
   394                             overlong = SDL_TRUE;
   395                         }
   396                         ch = (Uint32) (p[0] & 0x1F);
   397                         left = 1;
   398                     }
   399                 } else {
   400                     if ((p[0] & 0x80) != 0x00) {
   401                         /* Skip illegal sequences
   402                            return SDL_ICONV_EILSEQ;
   403                          */
   404                         ch = UNKNOWN_UNICODE;
   405                     } else {
   406                         ch = (Uint32) p[0];
   407                     }
   408                 }
   409                 ++src;
   410                 --srclen;
   411                 if (srclen < left) {
   412                     return SDL_ICONV_EINVAL;
   413                 }
   414                 while (left--) {
   415                     ++p;
   416                     if ((p[0] & 0xC0) != 0x80) {
   417                         /* Skip illegal sequences
   418                            return SDL_ICONV_EILSEQ;
   419                          */
   420                         ch = UNKNOWN_UNICODE;
   421                         break;
   422                     }
   423                     ch <<= 6;
   424                     ch |= (p[0] & 0x3F);
   425                     ++src;
   426                     --srclen;
   427                 }
   428                 if (overlong) {
   429                     /* Potential security risk
   430                        return SDL_ICONV_EILSEQ;
   431                      */
   432                     ch = UNKNOWN_UNICODE;
   433                 }
   434                 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
   435                     (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
   436                     /* Skip illegal sequences
   437                        return SDL_ICONV_EILSEQ;
   438                      */
   439                     ch = UNKNOWN_UNICODE;
   440                 }
   441             }
   442             break;
   443         case ENCODING_UTF16BE: /* RFC 2781 */
   444             {
   445                 Uint8 *p = (Uint8 *) src;
   446                 Uint16 W1, W2;
   447                 if (srclen < 2) {
   448                     return SDL_ICONV_EINVAL;
   449                 }
   450                 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   451                 src += 2;
   452                 srclen -= 2;
   453                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   454                     ch = (Uint32) W1;
   455                     break;
   456                 }
   457                 if (W1 > 0xDBFF) {
   458                     /* Skip illegal sequences
   459                        return SDL_ICONV_EILSEQ;
   460                      */
   461                     ch = UNKNOWN_UNICODE;
   462                     break;
   463                 }
   464                 if (srclen < 2) {
   465                     return SDL_ICONV_EINVAL;
   466                 }
   467                 p = (Uint8 *) src;
   468                 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   469                 src += 2;
   470                 srclen -= 2;
   471                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   472                     /* Skip illegal sequences
   473                        return SDL_ICONV_EILSEQ;
   474                      */
   475                     ch = UNKNOWN_UNICODE;
   476                     break;
   477                 }
   478                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   479                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   480             }
   481             break;
   482         case ENCODING_UTF16LE: /* RFC 2781 */
   483             {
   484                 Uint8 *p = (Uint8 *) src;
   485                 Uint16 W1, W2;
   486                 if (srclen < 2) {
   487                     return SDL_ICONV_EINVAL;
   488                 }
   489                 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   490                 src += 2;
   491                 srclen -= 2;
   492                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   493                     ch = (Uint32) W1;
   494                     break;
   495                 }
   496                 if (W1 > 0xDBFF) {
   497                     /* Skip illegal sequences
   498                        return SDL_ICONV_EILSEQ;
   499                      */
   500                     ch = UNKNOWN_UNICODE;
   501                     break;
   502                 }
   503                 if (srclen < 2) {
   504                     return SDL_ICONV_EINVAL;
   505                 }
   506                 p = (Uint8 *) src;
   507                 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   508                 src += 2;
   509                 srclen -= 2;
   510                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   511                     /* Skip illegal sequences
   512                        return SDL_ICONV_EILSEQ;
   513                      */
   514                     ch = UNKNOWN_UNICODE;
   515                     break;
   516                 }
   517                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   518                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   519             }
   520             break;
   521         case ENCODING_UTF32BE:
   522             {
   523                 Uint8 *p = (Uint8 *) src;
   524                 if (srclen < 4) {
   525                     return SDL_ICONV_EINVAL;
   526                 }
   527                 ch = ((Uint32) p[0] << 24) |
   528                     ((Uint32) p[1] << 16) |
   529                     ((Uint32) p[2] << 8) | (Uint32) p[3];
   530                 src += 4;
   531                 srclen -= 4;
   532             }
   533             break;
   534         case ENCODING_UTF32LE:
   535             {
   536                 Uint8 *p = (Uint8 *) src;
   537                 if (srclen < 4) {
   538                     return SDL_ICONV_EINVAL;
   539                 }
   540                 ch = ((Uint32) p[3] << 24) |
   541                     ((Uint32) p[2] << 16) |
   542                     ((Uint32) p[1] << 8) | (Uint32) p[0];
   543                 src += 4;
   544                 srclen -= 4;
   545             }
   546             break;
   547         case ENCODING_UCS2:
   548             {
   549                 Uint16 *p = (Uint16 *) src;
   550                 if (srclen < 2) {
   551                     return SDL_ICONV_EINVAL;
   552                 }
   553                 ch = *p;
   554                 src += 2;
   555                 srclen -= 2;
   556             }
   557             break;
   558         case ENCODING_UCS4:
   559             {
   560                 Uint32 *p = (Uint32 *) src;
   561                 if (srclen < 4) {
   562                     return SDL_ICONV_EINVAL;
   563                 }
   564                 ch = *p;
   565                 src += 4;
   566                 srclen -= 4;
   567             }
   568             break;
   569         }
   570 
   571         /* Encode a character */
   572         switch (cd->dst_fmt) {
   573         case ENCODING_ASCII:
   574             {
   575                 Uint8 *p = (Uint8 *) dst;
   576                 if (dstlen < 1) {
   577                     return SDL_ICONV_E2BIG;
   578                 }
   579                 if (ch > 0x7F) {
   580                     *p = UNKNOWN_ASCII;
   581                 } else {
   582                     *p = (Uint8) ch;
   583                 }
   584                 ++dst;
   585                 --dstlen;
   586             }
   587             break;
   588         case ENCODING_LATIN1:
   589             {
   590                 Uint8 *p = (Uint8 *) dst;
   591                 if (dstlen < 1) {
   592                     return SDL_ICONV_E2BIG;
   593                 }
   594                 if (ch > 0xFF) {
   595                     *p = UNKNOWN_ASCII;
   596                 } else {
   597                     *p = (Uint8) ch;
   598                 }
   599                 ++dst;
   600                 --dstlen;
   601             }
   602             break;
   603         case ENCODING_UTF8:    /* RFC 3629 */
   604             {
   605                 Uint8 *p = (Uint8 *) dst;
   606                 if (ch > 0x10FFFF) {
   607                     ch = UNKNOWN_UNICODE;
   608                 }
   609                 if (ch <= 0x7F) {
   610                     if (dstlen < 1) {
   611                         return SDL_ICONV_E2BIG;
   612                     }
   613                     *p = (Uint8) ch;
   614                     ++dst;
   615                     --dstlen;
   616                 } else if (ch <= 0x7FF) {
   617                     if (dstlen < 2) {
   618                         return SDL_ICONV_E2BIG;
   619                     }
   620                     p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
   621                     p[1] = 0x80 | (Uint8) (ch & 0x3F);
   622                     dst += 2;
   623                     dstlen -= 2;
   624                 } else if (ch <= 0xFFFF) {
   625                     if (dstlen < 3) {
   626                         return SDL_ICONV_E2BIG;
   627                     }
   628                     p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
   629                     p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   630                     p[2] = 0x80 | (Uint8) (ch & 0x3F);
   631                     dst += 3;
   632                     dstlen -= 3;
   633                 } else if (ch <= 0x1FFFFF) {
   634                     if (dstlen < 4) {
   635                         return SDL_ICONV_E2BIG;
   636                     }
   637                     p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
   638                     p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   639                     p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   640                     p[3] = 0x80 | (Uint8) (ch & 0x3F);
   641                     dst += 4;
   642                     dstlen -= 4;
   643                 } else if (ch <= 0x3FFFFFF) {
   644                     if (dstlen < 5) {
   645                         return SDL_ICONV_E2BIG;
   646                     }
   647                     p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
   648                     p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   649                     p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   650                     p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   651                     p[4] = 0x80 | (Uint8) (ch & 0x3F);
   652                     dst += 5;
   653                     dstlen -= 5;
   654                 } else {
   655                     if (dstlen < 6) {
   656                         return SDL_ICONV_E2BIG;
   657                     }
   658                     p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
   659                     p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
   660                     p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   661                     p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   662                     p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   663                     p[5] = 0x80 | (Uint8) (ch & 0x3F);
   664                     dst += 6;
   665                     dstlen -= 6;
   666                 }
   667             }
   668             break;
   669         case ENCODING_UTF16BE: /* RFC 2781 */
   670             {
   671                 Uint8 *p = (Uint8 *) dst;
   672                 if (ch > 0x10FFFF) {
   673                     ch = UNKNOWN_UNICODE;
   674                 }
   675                 if (ch < 0x10000) {
   676                     if (dstlen < 2) {
   677                         return SDL_ICONV_E2BIG;
   678                     }
   679                     p[0] = (Uint8) (ch >> 8);
   680                     p[1] = (Uint8) ch;
   681                     dst += 2;
   682                     dstlen -= 2;
   683                 } else {
   684                     Uint16 W1, W2;
   685                     if (dstlen < 4) {
   686                         return SDL_ICONV_E2BIG;
   687                     }
   688                     ch = ch - 0x10000;
   689                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   690                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   691                     p[0] = (Uint8) (W1 >> 8);
   692                     p[1] = (Uint8) W1;
   693                     p[2] = (Uint8) (W2 >> 8);
   694                     p[3] = (Uint8) W2;
   695                     dst += 4;
   696                     dstlen -= 4;
   697                 }
   698             }
   699             break;
   700         case ENCODING_UTF16LE: /* RFC 2781 */
   701             {
   702                 Uint8 *p = (Uint8 *) dst;
   703                 if (ch > 0x10FFFF) {
   704                     ch = UNKNOWN_UNICODE;
   705                 }
   706                 if (ch < 0x10000) {
   707                     if (dstlen < 2) {
   708                         return SDL_ICONV_E2BIG;
   709                     }
   710                     p[1] = (Uint8) (ch >> 8);
   711                     p[0] = (Uint8) ch;
   712                     dst += 2;
   713                     dstlen -= 2;
   714                 } else {
   715                     Uint16 W1, W2;
   716                     if (dstlen < 4) {
   717                         return SDL_ICONV_E2BIG;
   718                     }
   719                     ch = ch - 0x10000;
   720                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   721                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   722                     p[1] = (Uint8) (W1 >> 8);
   723                     p[0] = (Uint8) W1;
   724                     p[3] = (Uint8) (W2 >> 8);
   725                     p[2] = (Uint8) W2;
   726                     dst += 4;
   727                     dstlen -= 4;
   728                 }
   729             }
   730             break;
   731         case ENCODING_UTF32BE:
   732             {
   733                 Uint8 *p = (Uint8 *) dst;
   734                 if (ch > 0x10FFFF) {
   735                     ch = UNKNOWN_UNICODE;
   736                 }
   737                 if (dstlen < 4) {
   738                     return SDL_ICONV_E2BIG;
   739                 }
   740                 p[0] = (Uint8) (ch >> 24);
   741                 p[1] = (Uint8) (ch >> 16);
   742                 p[2] = (Uint8) (ch >> 8);
   743                 p[3] = (Uint8) ch;
   744                 dst += 4;
   745                 dstlen -= 4;
   746             }
   747             break;
   748         case ENCODING_UTF32LE:
   749             {
   750                 Uint8 *p = (Uint8 *) dst;
   751                 if (ch > 0x10FFFF) {
   752                     ch = UNKNOWN_UNICODE;
   753                 }
   754                 if (dstlen < 4) {
   755                     return SDL_ICONV_E2BIG;
   756                 }
   757                 p[3] = (Uint8) (ch >> 24);
   758                 p[2] = (Uint8) (ch >> 16);
   759                 p[1] = (Uint8) (ch >> 8);
   760                 p[0] = (Uint8) ch;
   761                 dst += 4;
   762                 dstlen -= 4;
   763             }
   764             break;
   765         case ENCODING_UCS2:
   766             {
   767                 Uint16 *p = (Uint16 *) dst;
   768                 if (ch > 0xFFFF) {
   769                     ch = UNKNOWN_UNICODE;
   770                 }
   771                 if (dstlen < 2) {
   772                     return SDL_ICONV_E2BIG;
   773                 }
   774                 *p = (Uint16) ch;
   775                 dst += 2;
   776                 dstlen -= 2;
   777             }
   778             break;
   779         case ENCODING_UCS4:
   780             {
   781                 Uint32 *p = (Uint32 *) dst;
   782                 if (ch > 0x7FFFFFFF) {
   783                     ch = UNKNOWN_UNICODE;
   784                 }
   785                 if (dstlen < 4) {
   786                     return SDL_ICONV_E2BIG;
   787                 }
   788                 *p = ch;
   789                 dst += 4;
   790                 dstlen -= 4;
   791             }
   792             break;
   793         }
   794 
   795         /* Update state */
   796         *inbuf = src;
   797         *inbytesleft = srclen;
   798         *outbuf = dst;
   799         *outbytesleft = dstlen;
   800         ++total;
   801     }
   802     return total;
   803 }
   804 
   805 int
   806 SDL_iconv_close(SDL_iconv_t cd)
   807 {
   808     if (cd && cd != (SDL_iconv_t) - 1) {
   809         SDL_free(cd);
   810     }
   811     return 0;
   812 }
   813 
   814 #endif /* !HAVE_ICONV */
   815 
   816 char *
   817 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
   818                  size_t inbytesleft)
   819 {
   820     SDL_iconv_t cd;
   821     char *string;
   822     size_t stringsize;
   823     char *outbuf;
   824     size_t outbytesleft;
   825     size_t retCode = 0;
   826 
   827     cd = SDL_iconv_open(tocode, fromcode);
   828     if (cd == (SDL_iconv_t) - 1) {
   829         /* See if we can recover here (fixes iconv on Solaris 11) */
   830         if (!tocode || !*tocode) {
   831             tocode = "UTF-8";
   832         }
   833         if (!fromcode || !*fromcode) {
   834             fromcode = "UTF-8";
   835         }
   836         cd = SDL_iconv_open(tocode, fromcode);
   837     }
   838     if (cd == (SDL_iconv_t) - 1) {
   839         return NULL;
   840     }
   841 
   842     stringsize = inbytesleft > 4 ? inbytesleft : 4;
   843     string = SDL_malloc(stringsize);
   844     if (!string) {
   845         SDL_iconv_close(cd);
   846         return NULL;
   847     }
   848     outbuf = string;
   849     outbytesleft = stringsize;
   850     SDL_memset(outbuf, 0, 4);
   851 
   852     while (inbytesleft > 0) {
   853         retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   854         switch (retCode) {
   855         case SDL_ICONV_E2BIG:
   856             {
   857                 char *oldstring = string;
   858                 stringsize *= 2;
   859                 string = SDL_realloc(string, stringsize);
   860                 if (!string) {
   861                     SDL_iconv_close(cd);
   862                     return NULL;
   863                 }
   864                 outbuf = string + (outbuf - oldstring);
   865                 outbytesleft = stringsize - (outbuf - string);
   866                 SDL_memset(outbuf, 0, 4);
   867             }
   868             break;
   869         case SDL_ICONV_EILSEQ:
   870             /* Try skipping some input data - not perfect, but... */
   871             ++inbuf;
   872             --inbytesleft;
   873             break;
   874         case SDL_ICONV_EINVAL:
   875         case SDL_ICONV_ERROR:
   876             /* We can't continue... */
   877             inbytesleft = 0;
   878             break;
   879         }
   880     }
   881     SDL_iconv_close(cd);
   882 
   883     return string;
   884 }
   885 
   886 /* vi: set ts=4 sw=4 expandtab: */