src/stdlib/SDL_iconv.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 10 Oct 2016 02:58:12 -0700
changeset 10504 9f8284fde4ea
parent 10481 42a679ff48e0
child 10644 594c17c446f6
permissions -rw-r--r--
Make sure we have iconv.h before building with it
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2016 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 
    22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
    23 #define SDL_DISABLE_ANALYZE_MACROS 1
    24 #endif
    25 
    26 #include "../SDL_internal.h"
    27 
    28 /* This file contains portable iconv functions for SDL */
    29 
    30 #include "SDL_stdinc.h"
    31 #include "SDL_endian.h"
    32 
    33 #if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
    34 #include <iconv.h>
    35 
    36 /* Depending on which standard the iconv() was implemented with,
    37    iconv() may or may not use const char ** for the inbuf param.
    38    If we get this wrong, it's just a warning, so no big deal.
    39 */
    40 #if defined(_XGP6) || defined(__APPLE__) || \
    41     (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \
    42     (defined(_NEWLIB_VERSION)))
    43 #define ICONV_INBUF_NONCONST
    44 #endif
    45 
    46 #include <errno.h>
    47 
    48 SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t));
    49 
    50 SDL_iconv_t
    51 SDL_iconv_open(const char *tocode, const char *fromcode)
    52 {
    53     return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode));
    54 }
    55 
    56 int
    57 SDL_iconv_close(SDL_iconv_t cd)
    58 {
    59     return iconv_close((iconv_t) ((size_t) cd));
    60 }
    61 
    62 size_t
    63 SDL_iconv(SDL_iconv_t cd,
    64           const char **inbuf, size_t * inbytesleft,
    65           char **outbuf, size_t * outbytesleft)
    66 {
    67     size_t retCode;
    68 #ifdef ICONV_INBUF_NONCONST
    69     retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft);
    70 #else
    71     retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft);
    72 #endif
    73     if (retCode == (size_t) - 1) {
    74         switch (errno) {
    75         case E2BIG:
    76             return SDL_ICONV_E2BIG;
    77         case EILSEQ:
    78             return SDL_ICONV_EILSEQ;
    79         case EINVAL:
    80             return SDL_ICONV_EINVAL;
    81         default:
    82             return SDL_ICONV_ERROR;
    83         }
    84     }
    85     return retCode;
    86 }
    87 
    88 #else
    89 
    90 /* Lots of useful information on Unicode at:
    91 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    92 */
    93 
    94 #define UNICODE_BOM	0xFEFF
    95 
    96 #define UNKNOWN_ASCII	'?'
    97 #define UNKNOWN_UNICODE	0xFFFD
    98 
    99 enum
   100 {
   101     ENCODING_UNKNOWN,
   102     ENCODING_ASCII,
   103     ENCODING_LATIN1,
   104     ENCODING_UTF8,
   105     ENCODING_UTF16,             /* Needs byte order marker */
   106     ENCODING_UTF16BE,
   107     ENCODING_UTF16LE,
   108     ENCODING_UTF32,             /* Needs byte order marker */
   109     ENCODING_UTF32BE,
   110     ENCODING_UTF32LE,
   111     ENCODING_UCS2BE,
   112     ENCODING_UCS2LE,
   113     ENCODING_UCS4BE,
   114     ENCODING_UCS4LE,
   115 };
   116 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
   117 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
   118 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
   119 #define ENCODING_UCS2NATIVE     ENCODING_UCS2BE
   120 #define ENCODING_UCS4NATIVE     ENCODING_UCS4BE
   121 #else
   122 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
   123 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
   124 #define ENCODING_UCS2NATIVE     ENCODING_UCS2LE
   125 #define ENCODING_UCS4NATIVE     ENCODING_UCS4LE
   126 #endif
   127 
   128 struct _SDL_iconv_t
   129 {
   130     int src_fmt;
   131     int dst_fmt;
   132 };
   133 
   134 static struct
   135 {
   136     const char *name;
   137     int format;
   138 } encodings[] = {
   139 /* *INDENT-OFF* */
   140     { "ASCII", ENCODING_ASCII },
   141     { "US-ASCII", ENCODING_ASCII },
   142     { "8859-1", ENCODING_LATIN1 },
   143     { "ISO-8859-1", ENCODING_LATIN1 },
   144     { "UTF8", ENCODING_UTF8 },
   145     { "UTF-8", ENCODING_UTF8 },
   146     { "UTF16", ENCODING_UTF16 },
   147     { "UTF-16", ENCODING_UTF16 },
   148     { "UTF16BE", ENCODING_UTF16BE },
   149     { "UTF-16BE", ENCODING_UTF16BE },
   150     { "UTF16LE", ENCODING_UTF16LE },
   151     { "UTF-16LE", ENCODING_UTF16LE },
   152     { "UTF32", ENCODING_UTF32 },
   153     { "UTF-32", ENCODING_UTF32 },
   154     { "UTF32BE", ENCODING_UTF32BE },
   155     { "UTF-32BE", ENCODING_UTF32BE },
   156     { "UTF32LE", ENCODING_UTF32LE },
   157     { "UTF-32LE", ENCODING_UTF32LE },
   158     { "UCS2", ENCODING_UCS2BE },
   159     { "UCS-2", ENCODING_UCS2BE },
   160     { "UCS-2LE", ENCODING_UCS2LE },
   161     { "UCS-2BE", ENCODING_UCS2BE },
   162     { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
   163     { "UCS4", ENCODING_UCS4BE },
   164     { "UCS-4", ENCODING_UCS4BE },
   165     { "UCS-4LE", ENCODING_UCS4LE },
   166     { "UCS-4BE", ENCODING_UCS4BE },
   167     { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
   168 /* *INDENT-ON* */
   169 };
   170 
   171 static const char *
   172 getlocale(char *buffer, size_t bufsize)
   173 {
   174     const char *lang;
   175     char *ptr;
   176 
   177     lang = SDL_getenv("LC_ALL");
   178     if (!lang) {
   179         lang = SDL_getenv("LC_CTYPE");
   180     }
   181     if (!lang) {
   182         lang = SDL_getenv("LC_MESSAGES");
   183     }
   184     if (!lang) {
   185         lang = SDL_getenv("LANG");
   186     }
   187     if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
   188         lang = "ASCII";
   189     }
   190 
   191     /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
   192     ptr = SDL_strchr(lang, '.');
   193     if (ptr != NULL) {
   194         lang = ptr + 1;
   195     }
   196 
   197     SDL_strlcpy(buffer, lang, bufsize);
   198     ptr = SDL_strchr(buffer, '@');
   199     if (ptr != NULL) {
   200         *ptr = '\0';            /* chop end of string. */
   201     }
   202 
   203     return buffer;
   204 }
   205 
   206 SDL_iconv_t
   207 SDL_iconv_open(const char *tocode, const char *fromcode)
   208 {
   209     int src_fmt = ENCODING_UNKNOWN;
   210     int dst_fmt = ENCODING_UNKNOWN;
   211     int i;
   212     char fromcode_buffer[64];
   213     char tocode_buffer[64];
   214 
   215     if (!fromcode || !*fromcode) {
   216         fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
   217     }
   218     if (!tocode || !*tocode) {
   219         tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
   220     }
   221     for (i = 0; i < SDL_arraysize(encodings); ++i) {
   222         if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
   223             src_fmt = encodings[i].format;
   224             if (dst_fmt != ENCODING_UNKNOWN) {
   225                 break;
   226             }
   227         }
   228         if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
   229             dst_fmt = encodings[i].format;
   230             if (src_fmt != ENCODING_UNKNOWN) {
   231                 break;
   232             }
   233         }
   234     }
   235     if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
   236         SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
   237         if (cd) {
   238             cd->src_fmt = src_fmt;
   239             cd->dst_fmt = dst_fmt;
   240             return cd;
   241         }
   242     }
   243     return (SDL_iconv_t) - 1;
   244 }
   245 
   246 size_t
   247 SDL_iconv(SDL_iconv_t cd,
   248           const char **inbuf, size_t * inbytesleft,
   249           char **outbuf, size_t * outbytesleft)
   250 {
   251     /* For simplicity, we'll convert everything to and from UCS-4 */
   252     const char *src;
   253     char *dst;
   254     size_t srclen, dstlen;
   255     Uint32 ch = 0;
   256     size_t total;
   257 
   258     if (!inbuf || !*inbuf) {
   259         /* Reset the context */
   260         return 0;
   261     }
   262     if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
   263         return SDL_ICONV_E2BIG;
   264     }
   265     src = *inbuf;
   266     srclen = (inbytesleft ? *inbytesleft : 0);
   267     dst = *outbuf;
   268     dstlen = *outbytesleft;
   269 
   270     switch (cd->src_fmt) {
   271     case ENCODING_UTF16:
   272         /* Scan for a byte order marker */
   273         {
   274             Uint8 *p = (Uint8 *) src;
   275             size_t n = srclen / 2;
   276             while (n) {
   277                 if (p[0] == 0xFF && p[1] == 0xFE) {
   278                     cd->src_fmt = ENCODING_UTF16BE;
   279                     break;
   280                 } else if (p[0] == 0xFE && p[1] == 0xFF) {
   281                     cd->src_fmt = ENCODING_UTF16LE;
   282                     break;
   283                 }
   284                 p += 2;
   285                 --n;
   286             }
   287             if (n == 0) {
   288                 /* We can't tell, default to host order */
   289                 cd->src_fmt = ENCODING_UTF16NATIVE;
   290             }
   291         }
   292         break;
   293     case ENCODING_UTF32:
   294         /* Scan for a byte order marker */
   295         {
   296             Uint8 *p = (Uint8 *) src;
   297             size_t n = srclen / 4;
   298             while (n) {
   299                 if (p[0] == 0xFF && p[1] == 0xFE &&
   300                     p[2] == 0x00 && p[3] == 0x00) {
   301                     cd->src_fmt = ENCODING_UTF32BE;
   302                     break;
   303                 } else if (p[0] == 0x00 && p[1] == 0x00 &&
   304                            p[2] == 0xFE && p[3] == 0xFF) {
   305                     cd->src_fmt = ENCODING_UTF32LE;
   306                     break;
   307                 }
   308                 p += 4;
   309                 --n;
   310             }
   311             if (n == 0) {
   312                 /* We can't tell, default to host order */
   313                 cd->src_fmt = ENCODING_UTF32NATIVE;
   314             }
   315         }
   316         break;
   317     }
   318 
   319     switch (cd->dst_fmt) {
   320     case ENCODING_UTF16:
   321         /* Default to host order, need to add byte order marker */
   322         if (dstlen < 2) {
   323             return SDL_ICONV_E2BIG;
   324         }
   325         *(Uint16 *) dst = UNICODE_BOM;
   326         dst += 2;
   327         dstlen -= 2;
   328         cd->dst_fmt = ENCODING_UTF16NATIVE;
   329         break;
   330     case ENCODING_UTF32:
   331         /* Default to host order, need to add byte order marker */
   332         if (dstlen < 4) {
   333             return SDL_ICONV_E2BIG;
   334         }
   335         *(Uint32 *) dst = UNICODE_BOM;
   336         dst += 4;
   337         dstlen -= 4;
   338         cd->dst_fmt = ENCODING_UTF32NATIVE;
   339         break;
   340     }
   341 
   342     total = 0;
   343     while (srclen > 0) {
   344         /* Decode a character */
   345         switch (cd->src_fmt) {
   346         case ENCODING_ASCII:
   347             {
   348                 Uint8 *p = (Uint8 *) src;
   349                 ch = (Uint32) (p[0] & 0x7F);
   350                 ++src;
   351                 --srclen;
   352             }
   353             break;
   354         case ENCODING_LATIN1:
   355             {
   356                 Uint8 *p = (Uint8 *) src;
   357                 ch = (Uint32) p[0];
   358                 ++src;
   359                 --srclen;
   360             }
   361             break;
   362         case ENCODING_UTF8:    /* RFC 3629 */
   363             {
   364                 Uint8 *p = (Uint8 *) src;
   365                 size_t left = 0;
   366                 SDL_bool overlong = SDL_FALSE;
   367                 if (p[0] >= 0xFC) {
   368                     if ((p[0] & 0xFE) != 0xFC) {
   369                         /* Skip illegal sequences
   370                            return SDL_ICONV_EILSEQ;
   371                          */
   372                         ch = UNKNOWN_UNICODE;
   373                     } else {
   374                         if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) {
   375                             overlong = SDL_TRUE;
   376                         }
   377                         ch = (Uint32) (p[0] & 0x01);
   378                         left = 5;
   379                     }
   380                 } else if (p[0] >= 0xF8) {
   381                     if ((p[0] & 0xFC) != 0xF8) {
   382                         /* Skip illegal sequences
   383                            return SDL_ICONV_EILSEQ;
   384                          */
   385                         ch = UNKNOWN_UNICODE;
   386                     } else {
   387                         if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) {
   388                             overlong = SDL_TRUE;
   389                         }
   390                         ch = (Uint32) (p[0] & 0x03);
   391                         left = 4;
   392                     }
   393                 } else if (p[0] >= 0xF0) {
   394                     if ((p[0] & 0xF8) != 0xF0) {
   395                         /* Skip illegal sequences
   396                            return SDL_ICONV_EILSEQ;
   397                          */
   398                         ch = UNKNOWN_UNICODE;
   399                     } else {
   400                         if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
   401                             overlong = SDL_TRUE;
   402                         }
   403                         ch = (Uint32) (p[0] & 0x07);
   404                         left = 3;
   405                     }
   406                 } else if (p[0] >= 0xE0) {
   407                     if ((p[0] & 0xF0) != 0xE0) {
   408                         /* Skip illegal sequences
   409                            return SDL_ICONV_EILSEQ;
   410                          */
   411                         ch = UNKNOWN_UNICODE;
   412                     } else {
   413                         if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
   414                             overlong = SDL_TRUE;
   415                         }
   416                         ch = (Uint32) (p[0] & 0x0F);
   417                         left = 2;
   418                     }
   419                 } else if (p[0] >= 0xC0) {
   420                     if ((p[0] & 0xE0) != 0xC0) {
   421                         /* Skip illegal sequences
   422                            return SDL_ICONV_EILSEQ;
   423                          */
   424                         ch = UNKNOWN_UNICODE;
   425                     } else {
   426                         if ((p[0] & 0xDE) == 0xC0) {
   427                             overlong = SDL_TRUE;
   428                         }
   429                         ch = (Uint32) (p[0] & 0x1F);
   430                         left = 1;
   431                     }
   432                 } else {
   433                     if ((p[0] & 0x80) != 0x00) {
   434                         /* Skip illegal sequences
   435                            return SDL_ICONV_EILSEQ;
   436                          */
   437                         ch = UNKNOWN_UNICODE;
   438                     } else {
   439                         ch = (Uint32) p[0];
   440                     }
   441                 }
   442                 ++src;
   443                 --srclen;
   444                 if (srclen < left) {
   445                     return SDL_ICONV_EINVAL;
   446                 }
   447                 while (left--) {
   448                     ++p;
   449                     if ((p[0] & 0xC0) != 0x80) {
   450                         /* Skip illegal sequences
   451                            return SDL_ICONV_EILSEQ;
   452                          */
   453                         ch = UNKNOWN_UNICODE;
   454                         break;
   455                     }
   456                     ch <<= 6;
   457                     ch |= (p[0] & 0x3F);
   458                     ++src;
   459                     --srclen;
   460                 }
   461                 if (overlong) {
   462                     /* Potential security risk
   463                        return SDL_ICONV_EILSEQ;
   464                      */
   465                     ch = UNKNOWN_UNICODE;
   466                 }
   467                 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
   468                     (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
   469                     /* Skip illegal sequences
   470                        return SDL_ICONV_EILSEQ;
   471                      */
   472                     ch = UNKNOWN_UNICODE;
   473                 }
   474             }
   475             break;
   476         case ENCODING_UTF16BE: /* RFC 2781 */
   477             {
   478                 Uint8 *p = (Uint8 *) src;
   479                 Uint16 W1, W2;
   480                 if (srclen < 2) {
   481                     return SDL_ICONV_EINVAL;
   482                 }
   483                 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   484                 src += 2;
   485                 srclen -= 2;
   486                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   487                     ch = (Uint32) W1;
   488                     break;
   489                 }
   490                 if (W1 > 0xDBFF) {
   491                     /* Skip illegal sequences
   492                        return SDL_ICONV_EILSEQ;
   493                      */
   494                     ch = UNKNOWN_UNICODE;
   495                     break;
   496                 }
   497                 if (srclen < 2) {
   498                     return SDL_ICONV_EINVAL;
   499                 }
   500                 p = (Uint8 *) src;
   501                 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   502                 src += 2;
   503                 srclen -= 2;
   504                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   505                     /* Skip illegal sequences
   506                        return SDL_ICONV_EILSEQ;
   507                      */
   508                     ch = UNKNOWN_UNICODE;
   509                     break;
   510                 }
   511                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   512                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   513             }
   514             break;
   515         case ENCODING_UTF16LE: /* RFC 2781 */
   516             {
   517                 Uint8 *p = (Uint8 *) src;
   518                 Uint16 W1, W2;
   519                 if (srclen < 2) {
   520                     return SDL_ICONV_EINVAL;
   521                 }
   522                 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   523                 src += 2;
   524                 srclen -= 2;
   525                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   526                     ch = (Uint32) W1;
   527                     break;
   528                 }
   529                 if (W1 > 0xDBFF) {
   530                     /* Skip illegal sequences
   531                        return SDL_ICONV_EILSEQ;
   532                      */
   533                     ch = UNKNOWN_UNICODE;
   534                     break;
   535                 }
   536                 if (srclen < 2) {
   537                     return SDL_ICONV_EINVAL;
   538                 }
   539                 p = (Uint8 *) src;
   540                 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   541                 src += 2;
   542                 srclen -= 2;
   543                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   544                     /* Skip illegal sequences
   545                        return SDL_ICONV_EILSEQ;
   546                      */
   547                     ch = UNKNOWN_UNICODE;
   548                     break;
   549                 }
   550                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   551                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   552             }
   553             break;
   554         case ENCODING_UCS2LE:
   555             {
   556                 Uint8 *p = (Uint8 *) src;
   557                 if (srclen < 2) {
   558                     return SDL_ICONV_EINVAL;
   559                 }
   560                 ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
   561                 src += 2;
   562                 srclen -= 2;
   563             }
   564             break;
   565         case ENCODING_UCS2BE:
   566             {
   567                 Uint8 *p = (Uint8 *) src;
   568                 if (srclen < 2) {
   569                     return SDL_ICONV_EINVAL;
   570                 }
   571                 ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
   572                 src += 2;
   573                 srclen -= 2;
   574             }
   575             break;
   576         case ENCODING_UCS4BE:
   577         case ENCODING_UTF32BE:
   578             {
   579                 Uint8 *p = (Uint8 *) src;
   580                 if (srclen < 4) {
   581                     return SDL_ICONV_EINVAL;
   582                 }
   583                 ch = ((Uint32) p[0] << 24) |
   584                     ((Uint32) p[1] << 16) |
   585                     ((Uint32) p[2] << 8) | (Uint32) p[3];
   586                 src += 4;
   587                 srclen -= 4;
   588             }
   589             break;
   590         case ENCODING_UCS4LE:
   591         case ENCODING_UTF32LE:
   592             {
   593                 Uint8 *p = (Uint8 *) src;
   594                 if (srclen < 4) {
   595                     return SDL_ICONV_EINVAL;
   596                 }
   597                 ch = ((Uint32) p[3] << 24) |
   598                     ((Uint32) p[2] << 16) |
   599                     ((Uint32) p[1] << 8) | (Uint32) p[0];
   600                 src += 4;
   601                 srclen -= 4;
   602             }
   603             break;
   604         }
   605 
   606         /* Encode a character */
   607         switch (cd->dst_fmt) {
   608         case ENCODING_ASCII:
   609             {
   610                 Uint8 *p = (Uint8 *) dst;
   611                 if (dstlen < 1) {
   612                     return SDL_ICONV_E2BIG;
   613                 }
   614                 if (ch > 0x7F) {
   615                     *p = UNKNOWN_ASCII;
   616                 } else {
   617                     *p = (Uint8) ch;
   618                 }
   619                 ++dst;
   620                 --dstlen;
   621             }
   622             break;
   623         case ENCODING_LATIN1:
   624             {
   625                 Uint8 *p = (Uint8 *) dst;
   626                 if (dstlen < 1) {
   627                     return SDL_ICONV_E2BIG;
   628                 }
   629                 if (ch > 0xFF) {
   630                     *p = UNKNOWN_ASCII;
   631                 } else {
   632                     *p = (Uint8) ch;
   633                 }
   634                 ++dst;
   635                 --dstlen;
   636             }
   637             break;
   638         case ENCODING_UTF8:    /* RFC 3629 */
   639             {
   640                 Uint8 *p = (Uint8 *) dst;
   641                 if (ch > 0x10FFFF) {
   642                     ch = UNKNOWN_UNICODE;
   643                 }
   644                 if (ch <= 0x7F) {
   645                     if (dstlen < 1) {
   646                         return SDL_ICONV_E2BIG;
   647                     }
   648                     *p = (Uint8) ch;
   649                     ++dst;
   650                     --dstlen;
   651                 } else if (ch <= 0x7FF) {
   652                     if (dstlen < 2) {
   653                         return SDL_ICONV_E2BIG;
   654                     }
   655                     p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
   656                     p[1] = 0x80 | (Uint8) (ch & 0x3F);
   657                     dst += 2;
   658                     dstlen -= 2;
   659                 } else if (ch <= 0xFFFF) {
   660                     if (dstlen < 3) {
   661                         return SDL_ICONV_E2BIG;
   662                     }
   663                     p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
   664                     p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   665                     p[2] = 0x80 | (Uint8) (ch & 0x3F);
   666                     dst += 3;
   667                     dstlen -= 3;
   668                 } else if (ch <= 0x1FFFFF) {
   669                     if (dstlen < 4) {
   670                         return SDL_ICONV_E2BIG;
   671                     }
   672                     p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
   673                     p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   674                     p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   675                     p[3] = 0x80 | (Uint8) (ch & 0x3F);
   676                     dst += 4;
   677                     dstlen -= 4;
   678                 } else if (ch <= 0x3FFFFFF) {
   679                     if (dstlen < 5) {
   680                         return SDL_ICONV_E2BIG;
   681                     }
   682                     p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
   683                     p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   684                     p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   685                     p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   686                     p[4] = 0x80 | (Uint8) (ch & 0x3F);
   687                     dst += 5;
   688                     dstlen -= 5;
   689                 } else {
   690                     if (dstlen < 6) {
   691                         return SDL_ICONV_E2BIG;
   692                     }
   693                     p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
   694                     p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
   695                     p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   696                     p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   697                     p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   698                     p[5] = 0x80 | (Uint8) (ch & 0x3F);
   699                     dst += 6;
   700                     dstlen -= 6;
   701                 }
   702             }
   703             break;
   704         case ENCODING_UTF16BE: /* RFC 2781 */
   705             {
   706                 Uint8 *p = (Uint8 *) dst;
   707                 if (ch > 0x10FFFF) {
   708                     ch = UNKNOWN_UNICODE;
   709                 }
   710                 if (ch < 0x10000) {
   711                     if (dstlen < 2) {
   712                         return SDL_ICONV_E2BIG;
   713                     }
   714                     p[0] = (Uint8) (ch >> 8);
   715                     p[1] = (Uint8) ch;
   716                     dst += 2;
   717                     dstlen -= 2;
   718                 } else {
   719                     Uint16 W1, W2;
   720                     if (dstlen < 4) {
   721                         return SDL_ICONV_E2BIG;
   722                     }
   723                     ch = ch - 0x10000;
   724                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   725                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   726                     p[0] = (Uint8) (W1 >> 8);
   727                     p[1] = (Uint8) W1;
   728                     p[2] = (Uint8) (W2 >> 8);
   729                     p[3] = (Uint8) W2;
   730                     dst += 4;
   731                     dstlen -= 4;
   732                 }
   733             }
   734             break;
   735         case ENCODING_UTF16LE: /* RFC 2781 */
   736             {
   737                 Uint8 *p = (Uint8 *) dst;
   738                 if (ch > 0x10FFFF) {
   739                     ch = UNKNOWN_UNICODE;
   740                 }
   741                 if (ch < 0x10000) {
   742                     if (dstlen < 2) {
   743                         return SDL_ICONV_E2BIG;
   744                     }
   745                     p[1] = (Uint8) (ch >> 8);
   746                     p[0] = (Uint8) ch;
   747                     dst += 2;
   748                     dstlen -= 2;
   749                 } else {
   750                     Uint16 W1, W2;
   751                     if (dstlen < 4) {
   752                         return SDL_ICONV_E2BIG;
   753                     }
   754                     ch = ch - 0x10000;
   755                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   756                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   757                     p[1] = (Uint8) (W1 >> 8);
   758                     p[0] = (Uint8) W1;
   759                     p[3] = (Uint8) (W2 >> 8);
   760                     p[2] = (Uint8) W2;
   761                     dst += 4;
   762                     dstlen -= 4;
   763                 }
   764             }
   765             break;
   766         case ENCODING_UCS2BE:
   767             {
   768                 Uint8 *p = (Uint8 *) dst;
   769                 if (ch > 0xFFFF) {
   770                     ch = UNKNOWN_UNICODE;
   771                 }
   772                 if (dstlen < 2) {
   773                     return SDL_ICONV_E2BIG;
   774                 }
   775                 p[0] = (Uint8) (ch >> 8);
   776                 p[1] = (Uint8) ch;
   777                 dst += 2;
   778                 dstlen -= 2;
   779             }
   780             break;
   781         case ENCODING_UCS2LE:
   782             {
   783                 Uint8 *p = (Uint8 *) dst;
   784                 if (ch > 0xFFFF) {
   785                     ch = UNKNOWN_UNICODE;
   786                 }
   787                 if (dstlen < 2) {
   788                     return SDL_ICONV_E2BIG;
   789                 }
   790                 p[1] = (Uint8) (ch >> 8);
   791                 p[0] = (Uint8) ch;
   792                 dst += 2;
   793                 dstlen -= 2;
   794             }
   795             break;
   796         case ENCODING_UTF32BE:
   797             if (ch > 0x10FFFF) {
   798                 ch = UNKNOWN_UNICODE;
   799             }
   800         case ENCODING_UCS4BE:
   801             if (ch > 0x7FFFFFFF) {
   802                 ch = UNKNOWN_UNICODE;
   803             }
   804             {
   805                 Uint8 *p = (Uint8 *) dst;
   806                 if (dstlen < 4) {
   807                     return SDL_ICONV_E2BIG;
   808                 }
   809                 p[0] = (Uint8) (ch >> 24);
   810                 p[1] = (Uint8) (ch >> 16);
   811                 p[2] = (Uint8) (ch >> 8);
   812                 p[3] = (Uint8) ch;
   813                 dst += 4;
   814                 dstlen -= 4;
   815             }
   816             break;
   817         case ENCODING_UTF32LE:
   818             if (ch > 0x10FFFF) {
   819                 ch = UNKNOWN_UNICODE;
   820             }
   821         case ENCODING_UCS4LE:
   822             if (ch > 0x7FFFFFFF) {
   823                 ch = UNKNOWN_UNICODE;
   824             }
   825             {
   826                 Uint8 *p = (Uint8 *) dst;
   827                 if (dstlen < 4) {
   828                     return SDL_ICONV_E2BIG;
   829                 }
   830                 p[3] = (Uint8) (ch >> 24);
   831                 p[2] = (Uint8) (ch >> 16);
   832                 p[1] = (Uint8) (ch >> 8);
   833                 p[0] = (Uint8) ch;
   834                 dst += 4;
   835                 dstlen -= 4;
   836             }
   837             break;
   838         }
   839 
   840         /* Update state */
   841         *inbuf = src;
   842         *inbytesleft = srclen;
   843         *outbuf = dst;
   844         *outbytesleft = dstlen;
   845         ++total;
   846     }
   847     return total;
   848 }
   849 
   850 int
   851 SDL_iconv_close(SDL_iconv_t cd)
   852 {
   853     if (cd != (SDL_iconv_t)-1) {
   854         SDL_free(cd);
   855     }
   856     return 0;
   857 }
   858 
   859 #endif /* !HAVE_ICONV */
   860 
   861 char *
   862 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
   863                  size_t inbytesleft)
   864 {
   865     SDL_iconv_t cd;
   866     char *string;
   867     size_t stringsize;
   868     char *outbuf;
   869     size_t outbytesleft;
   870     size_t retCode = 0;
   871 
   872     cd = SDL_iconv_open(tocode, fromcode);
   873     if (cd == (SDL_iconv_t) - 1) {
   874         /* See if we can recover here (fixes iconv on Solaris 11) */
   875         if (!tocode || !*tocode) {
   876             tocode = "UTF-8";
   877         }
   878         if (!fromcode || !*fromcode) {
   879             fromcode = "UTF-8";
   880         }
   881         cd = SDL_iconv_open(tocode, fromcode);
   882     }
   883     if (cd == (SDL_iconv_t) - 1) {
   884         return NULL;
   885     }
   886 
   887     stringsize = inbytesleft > 4 ? inbytesleft : 4;
   888     string = SDL_malloc(stringsize);
   889     if (!string) {
   890         SDL_iconv_close(cd);
   891         return NULL;
   892     }
   893     outbuf = string;
   894     outbytesleft = stringsize;
   895     SDL_memset(outbuf, 0, 4);
   896 
   897     while (inbytesleft > 0) {
   898         retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   899         switch (retCode) {
   900         case SDL_ICONV_E2BIG:
   901             {
   902                 char *oldstring = string;
   903                 stringsize *= 2;
   904                 string = SDL_realloc(string, stringsize);
   905                 if (!string) {
   906                     SDL_iconv_close(cd);
   907                     return NULL;
   908                 }
   909                 outbuf = string + (outbuf - oldstring);
   910                 outbytesleft = stringsize - (outbuf - string);
   911                 SDL_memset(outbuf, 0, 4);
   912             }
   913             break;
   914         case SDL_ICONV_EILSEQ:
   915             /* Try skipping some input data - not perfect, but... */
   916             ++inbuf;
   917             --inbytesleft;
   918             break;
   919         case SDL_ICONV_EINVAL:
   920         case SDL_ICONV_ERROR:
   921             /* We can't continue... */
   922             inbytesleft = 0;
   923             break;
   924         }
   925     }
   926     SDL_iconv_close(cd);
   927 
   928     return string;
   929 }
   930 
   931 /* vi: set ts=4 sw=4 expandtab: */