src/stdlib/SDL_iconv.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 02 Feb 2014 00:53:27 -0800
changeset 8149 681eb46b8ac4
parent 8093 b43765095a6f
child 8879 f6e4f24df1ac
permissions -rw-r--r--
Fixed bug 2374 - Update copyright for 2014...

Is it that time already??
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 /* This file contains portable iconv functions for SDL */
    24 
    25 #include "SDL_stdinc.h"
    26 #include "SDL_endian.h"
    27 
    28 #ifdef HAVE_ICONV
    29 
    30 /* Depending on which standard the iconv() was implemented with,
    31    iconv() may or may not use const char ** for the inbuf param.
    32    If we get this wrong, it's just a warning, so no big deal.
    33 */
    34 #if defined(_XGP6) || defined(__APPLE__) || \
    35     (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)))
    36 #define ICONV_INBUF_NONCONST
    37 #endif
    38 
    39 #include <errno.h>
    40 
    41 SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t));
    42 
    43 SDL_iconv_t
    44 SDL_iconv_open(const char *tocode, const char *fromcode)
    45 {
    46     return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode));
    47 }
    48 
    49 int
    50 SDL_iconv_close(SDL_iconv_t cd)
    51 {
    52     return iconv_close((iconv_t) ((size_t) cd));
    53 }
    54 
    55 size_t
    56 SDL_iconv(SDL_iconv_t cd,
    57           const char **inbuf, size_t * inbytesleft,
    58           char **outbuf, size_t * outbytesleft)
    59 {
    60     size_t retCode;
    61 #ifdef ICONV_INBUF_NONCONST
    62     retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft);
    63 #else
    64     retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft);
    65 #endif
    66     if (retCode == (size_t) - 1) {
    67         switch (errno) {
    68         case E2BIG:
    69             return SDL_ICONV_E2BIG;
    70         case EILSEQ:
    71             return SDL_ICONV_EILSEQ;
    72         case EINVAL:
    73             return SDL_ICONV_EINVAL;
    74         default:
    75             return SDL_ICONV_ERROR;
    76         }
    77     }
    78     return retCode;
    79 }
    80 
    81 #else
    82 
    83 /* Lots of useful information on Unicode at:
    84 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    85 */
    86 
    87 #define UNICODE_BOM	0xFEFF
    88 
    89 #define UNKNOWN_ASCII	'?'
    90 #define UNKNOWN_UNICODE	0xFFFD
    91 
    92 enum
    93 {
    94     ENCODING_UNKNOWN,
    95     ENCODING_ASCII,
    96     ENCODING_LATIN1,
    97     ENCODING_UTF8,
    98     ENCODING_UTF16,             /* Needs byte order marker */
    99     ENCODING_UTF16BE,
   100     ENCODING_UTF16LE,
   101     ENCODING_UTF32,             /* Needs byte order marker */
   102     ENCODING_UTF32BE,
   103     ENCODING_UTF32LE,
   104     ENCODING_UCS2BE,
   105     ENCODING_UCS2LE,
   106     ENCODING_UCS4BE,
   107     ENCODING_UCS4LE,
   108 };
   109 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
   110 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
   111 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
   112 #define ENCODING_UCS2NATIVE     ENCODING_UCS2BE
   113 #define ENCODING_UCS4NATIVE     ENCODING_UCS4BE
   114 #else
   115 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
   116 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
   117 #define ENCODING_UCS2NATIVE     ENCODING_UCS2LE
   118 #define ENCODING_UCS4NATIVE     ENCODING_UCS4LE
   119 #endif
   120 
   121 struct _SDL_iconv_t
   122 {
   123     int src_fmt;
   124     int dst_fmt;
   125 };
   126 
   127 static struct
   128 {
   129     const char *name;
   130     int format;
   131 } encodings[] = {
   132 /* *INDENT-OFF* */
   133     { "ASCII", ENCODING_ASCII },
   134     { "US-ASCII", ENCODING_ASCII },
   135     { "8859-1", ENCODING_LATIN1 },
   136     { "ISO-8859-1", ENCODING_LATIN1 },
   137     { "UTF8", ENCODING_UTF8 },
   138     { "UTF-8", ENCODING_UTF8 },
   139     { "UTF16", ENCODING_UTF16 },
   140     { "UTF-16", ENCODING_UTF16 },
   141     { "UTF16BE", ENCODING_UTF16BE },
   142     { "UTF-16BE", ENCODING_UTF16BE },
   143     { "UTF16LE", ENCODING_UTF16LE },
   144     { "UTF-16LE", ENCODING_UTF16LE },
   145     { "UTF32", ENCODING_UTF32 },
   146     { "UTF-32", ENCODING_UTF32 },
   147     { "UTF32BE", ENCODING_UTF32BE },
   148     { "UTF-32BE", ENCODING_UTF32BE },
   149     { "UTF32LE", ENCODING_UTF32LE },
   150     { "UTF-32LE", ENCODING_UTF32LE },
   151     { "UCS2", ENCODING_UCS2BE },
   152     { "UCS-2", ENCODING_UCS2BE },
   153     { "UCS-2LE", ENCODING_UCS2LE },
   154     { "UCS-2BE", ENCODING_UCS2BE },
   155     { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
   156     { "UCS4", ENCODING_UCS4BE },
   157     { "UCS-4", ENCODING_UCS4BE },
   158     { "UCS-4LE", ENCODING_UCS4LE },
   159     { "UCS-4BE", ENCODING_UCS4BE },
   160     { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
   161 /* *INDENT-ON* */
   162 };
   163 
   164 static const char *
   165 getlocale(char *buffer, size_t bufsize)
   166 {
   167     const char *lang;
   168     char *ptr;
   169 
   170     lang = SDL_getenv("LC_ALL");
   171     if (!lang) {
   172         lang = SDL_getenv("LC_CTYPE");
   173     }
   174     if (!lang) {
   175         lang = SDL_getenv("LC_MESSAGES");
   176     }
   177     if (!lang) {
   178         lang = SDL_getenv("LANG");
   179     }
   180     if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
   181         lang = "ASCII";
   182     }
   183 
   184     /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
   185     ptr = SDL_strchr(lang, '.');
   186     if (ptr != NULL) {
   187         lang = ptr + 1;
   188     }
   189 
   190     SDL_strlcpy(buffer, lang, bufsize);
   191     ptr = SDL_strchr(buffer, '@');
   192     if (ptr != NULL) {
   193         *ptr = '\0';            /* chop end of string. */
   194     }
   195 
   196     return buffer;
   197 }
   198 
   199 SDL_iconv_t
   200 SDL_iconv_open(const char *tocode, const char *fromcode)
   201 {
   202     int src_fmt = ENCODING_UNKNOWN;
   203     int dst_fmt = ENCODING_UNKNOWN;
   204     int i;
   205     char fromcode_buffer[64];
   206     char tocode_buffer[64];
   207 
   208     if (!fromcode || !*fromcode) {
   209         fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
   210     }
   211     if (!tocode || !*tocode) {
   212         tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
   213     }
   214     for (i = 0; i < SDL_arraysize(encodings); ++i) {
   215         if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
   216             src_fmt = encodings[i].format;
   217             if (dst_fmt != ENCODING_UNKNOWN) {
   218                 break;
   219             }
   220         }
   221         if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
   222             dst_fmt = encodings[i].format;
   223             if (src_fmt != ENCODING_UNKNOWN) {
   224                 break;
   225             }
   226         }
   227     }
   228     if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
   229         SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
   230         if (cd) {
   231             cd->src_fmt = src_fmt;
   232             cd->dst_fmt = dst_fmt;
   233             return cd;
   234         }
   235     }
   236     return (SDL_iconv_t) - 1;
   237 }
   238 
   239 size_t
   240 SDL_iconv(SDL_iconv_t cd,
   241           const char **inbuf, size_t * inbytesleft,
   242           char **outbuf, size_t * outbytesleft)
   243 {
   244     /* For simplicity, we'll convert everything to and from UCS-4 */
   245     const char *src;
   246     char *dst;
   247     size_t srclen, dstlen;
   248     Uint32 ch = 0;
   249     size_t total;
   250 
   251     if (!inbuf || !*inbuf) {
   252         /* Reset the context */
   253         return 0;
   254     }
   255     if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
   256         return SDL_ICONV_E2BIG;
   257     }
   258     src = *inbuf;
   259     srclen = (inbytesleft ? *inbytesleft : 0);
   260     dst = *outbuf;
   261     dstlen = *outbytesleft;
   262 
   263     switch (cd->src_fmt) {
   264     case ENCODING_UTF16:
   265         /* Scan for a byte order marker */
   266         {
   267             Uint8 *p = (Uint8 *) src;
   268             size_t n = srclen / 2;
   269             while (n) {
   270                 if (p[0] == 0xFF && p[1] == 0xFE) {
   271                     cd->src_fmt = ENCODING_UTF16BE;
   272                     break;
   273                 } else if (p[0] == 0xFE && p[1] == 0xFF) {
   274                     cd->src_fmt = ENCODING_UTF16LE;
   275                     break;
   276                 }
   277                 p += 2;
   278                 --n;
   279             }
   280             if (n == 0) {
   281                 /* We can't tell, default to host order */
   282                 cd->src_fmt = ENCODING_UTF16NATIVE;
   283             }
   284         }
   285         break;
   286     case ENCODING_UTF32:
   287         /* Scan for a byte order marker */
   288         {
   289             Uint8 *p = (Uint8 *) src;
   290             size_t n = srclen / 4;
   291             while (n) {
   292                 if (p[0] == 0xFF && p[1] == 0xFE &&
   293                     p[2] == 0x00 && p[3] == 0x00) {
   294                     cd->src_fmt = ENCODING_UTF32BE;
   295                     break;
   296                 } else if (p[0] == 0x00 && p[1] == 0x00 &&
   297                            p[2] == 0xFE && p[3] == 0xFF) {
   298                     cd->src_fmt = ENCODING_UTF32LE;
   299                     break;
   300                 }
   301                 p += 4;
   302                 --n;
   303             }
   304             if (n == 0) {
   305                 /* We can't tell, default to host order */
   306                 cd->src_fmt = ENCODING_UTF32NATIVE;
   307             }
   308         }
   309         break;
   310     }
   311 
   312     switch (cd->dst_fmt) {
   313     case ENCODING_UTF16:
   314         /* Default to host order, need to add byte order marker */
   315         if (dstlen < 2) {
   316             return SDL_ICONV_E2BIG;
   317         }
   318         *(Uint16 *) dst = UNICODE_BOM;
   319         dst += 2;
   320         dstlen -= 2;
   321         cd->dst_fmt = ENCODING_UTF16NATIVE;
   322         break;
   323     case ENCODING_UTF32:
   324         /* Default to host order, need to add byte order marker */
   325         if (dstlen < 4) {
   326             return SDL_ICONV_E2BIG;
   327         }
   328         *(Uint32 *) dst = UNICODE_BOM;
   329         dst += 4;
   330         dstlen -= 4;
   331         cd->dst_fmt = ENCODING_UTF32NATIVE;
   332         break;
   333     }
   334 
   335     total = 0;
   336     while (srclen > 0) {
   337         /* Decode a character */
   338         switch (cd->src_fmt) {
   339         case ENCODING_ASCII:
   340             {
   341                 Uint8 *p = (Uint8 *) src;
   342                 ch = (Uint32) (p[0] & 0x7F);
   343                 ++src;
   344                 --srclen;
   345             }
   346             break;
   347         case ENCODING_LATIN1:
   348             {
   349                 Uint8 *p = (Uint8 *) src;
   350                 ch = (Uint32) p[0];
   351                 ++src;
   352                 --srclen;
   353             }
   354             break;
   355         case ENCODING_UTF8:    /* RFC 3629 */
   356             {
   357                 Uint8 *p = (Uint8 *) src;
   358                 size_t left = 0;
   359                 SDL_bool overlong = SDL_FALSE;
   360                 if (p[0] >= 0xFC) {
   361                     if ((p[0] & 0xFE) != 0xFC) {
   362                         /* Skip illegal sequences
   363                            return SDL_ICONV_EILSEQ;
   364                          */
   365                         ch = UNKNOWN_UNICODE;
   366                     } else {
   367                         if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) {
   368                             overlong = SDL_TRUE;
   369                         }
   370                         ch = (Uint32) (p[0] & 0x01);
   371                         left = 5;
   372                     }
   373                 } else if (p[0] >= 0xF8) {
   374                     if ((p[0] & 0xFC) != 0xF8) {
   375                         /* Skip illegal sequences
   376                            return SDL_ICONV_EILSEQ;
   377                          */
   378                         ch = UNKNOWN_UNICODE;
   379                     } else {
   380                         if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) {
   381                             overlong = SDL_TRUE;
   382                         }
   383                         ch = (Uint32) (p[0] & 0x03);
   384                         left = 4;
   385                     }
   386                 } else if (p[0] >= 0xF0) {
   387                     if ((p[0] & 0xF8) != 0xF0) {
   388                         /* Skip illegal sequences
   389                            return SDL_ICONV_EILSEQ;
   390                          */
   391                         ch = UNKNOWN_UNICODE;
   392                     } else {
   393                         if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
   394                             overlong = SDL_TRUE;
   395                         }
   396                         ch = (Uint32) (p[0] & 0x07);
   397                         left = 3;
   398                     }
   399                 } else if (p[0] >= 0xE0) {
   400                     if ((p[0] & 0xF0) != 0xE0) {
   401                         /* Skip illegal sequences
   402                            return SDL_ICONV_EILSEQ;
   403                          */
   404                         ch = UNKNOWN_UNICODE;
   405                     } else {
   406                         if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
   407                             overlong = SDL_TRUE;
   408                         }
   409                         ch = (Uint32) (p[0] & 0x0F);
   410                         left = 2;
   411                     }
   412                 } else if (p[0] >= 0xC0) {
   413                     if ((p[0] & 0xE0) != 0xC0) {
   414                         /* Skip illegal sequences
   415                            return SDL_ICONV_EILSEQ;
   416                          */
   417                         ch = UNKNOWN_UNICODE;
   418                     } else {
   419                         if ((p[0] & 0xDE) == 0xC0) {
   420                             overlong = SDL_TRUE;
   421                         }
   422                         ch = (Uint32) (p[0] & 0x1F);
   423                         left = 1;
   424                     }
   425                 } else {
   426                     if ((p[0] & 0x80) != 0x00) {
   427                         /* Skip illegal sequences
   428                            return SDL_ICONV_EILSEQ;
   429                          */
   430                         ch = UNKNOWN_UNICODE;
   431                     } else {
   432                         ch = (Uint32) p[0];
   433                     }
   434                 }
   435                 ++src;
   436                 --srclen;
   437                 if (srclen < left) {
   438                     return SDL_ICONV_EINVAL;
   439                 }
   440                 while (left--) {
   441                     ++p;
   442                     if ((p[0] & 0xC0) != 0x80) {
   443                         /* Skip illegal sequences
   444                            return SDL_ICONV_EILSEQ;
   445                          */
   446                         ch = UNKNOWN_UNICODE;
   447                         break;
   448                     }
   449                     ch <<= 6;
   450                     ch |= (p[0] & 0x3F);
   451                     ++src;
   452                     --srclen;
   453                 }
   454                 if (overlong) {
   455                     /* Potential security risk
   456                        return SDL_ICONV_EILSEQ;
   457                      */
   458                     ch = UNKNOWN_UNICODE;
   459                 }
   460                 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
   461                     (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
   462                     /* Skip illegal sequences
   463                        return SDL_ICONV_EILSEQ;
   464                      */
   465                     ch = UNKNOWN_UNICODE;
   466                 }
   467             }
   468             break;
   469         case ENCODING_UTF16BE: /* RFC 2781 */
   470             {
   471                 Uint8 *p = (Uint8 *) src;
   472                 Uint16 W1, W2;
   473                 if (srclen < 2) {
   474                     return SDL_ICONV_EINVAL;
   475                 }
   476                 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   477                 src += 2;
   478                 srclen -= 2;
   479                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   480                     ch = (Uint32) W1;
   481                     break;
   482                 }
   483                 if (W1 > 0xDBFF) {
   484                     /* Skip illegal sequences
   485                        return SDL_ICONV_EILSEQ;
   486                      */
   487                     ch = UNKNOWN_UNICODE;
   488                     break;
   489                 }
   490                 if (srclen < 2) {
   491                     return SDL_ICONV_EINVAL;
   492                 }
   493                 p = (Uint8 *) src;
   494                 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   495                 src += 2;
   496                 srclen -= 2;
   497                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   498                     /* Skip illegal sequences
   499                        return SDL_ICONV_EILSEQ;
   500                      */
   501                     ch = UNKNOWN_UNICODE;
   502                     break;
   503                 }
   504                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   505                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   506             }
   507             break;
   508         case ENCODING_UTF16LE: /* RFC 2781 */
   509             {
   510                 Uint8 *p = (Uint8 *) src;
   511                 Uint16 W1, W2;
   512                 if (srclen < 2) {
   513                     return SDL_ICONV_EINVAL;
   514                 }
   515                 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   516                 src += 2;
   517                 srclen -= 2;
   518                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   519                     ch = (Uint32) W1;
   520                     break;
   521                 }
   522                 if (W1 > 0xDBFF) {
   523                     /* Skip illegal sequences
   524                        return SDL_ICONV_EILSEQ;
   525                      */
   526                     ch = UNKNOWN_UNICODE;
   527                     break;
   528                 }
   529                 if (srclen < 2) {
   530                     return SDL_ICONV_EINVAL;
   531                 }
   532                 p = (Uint8 *) src;
   533                 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   534                 src += 2;
   535                 srclen -= 2;
   536                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   537                     /* Skip illegal sequences
   538                        return SDL_ICONV_EILSEQ;
   539                      */
   540                     ch = UNKNOWN_UNICODE;
   541                     break;
   542                 }
   543                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   544                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   545             }
   546             break;
   547         case ENCODING_UCS2LE:
   548             {
   549                 Uint8 *p = (Uint8 *) src;
   550                 if (srclen < 2) {
   551                     return SDL_ICONV_EINVAL;
   552                 }
   553                 ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
   554                 src += 2;
   555                 srclen -= 2;
   556             }
   557             break;
   558         case ENCODING_UCS2BE:
   559             {
   560                 Uint8 *p = (Uint8 *) src;
   561                 if (srclen < 2) {
   562                     return SDL_ICONV_EINVAL;
   563                 }
   564                 ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
   565                 src += 2;
   566                 srclen -= 2;
   567             }
   568             break;
   569         case ENCODING_UCS4BE:
   570         case ENCODING_UTF32BE:
   571             {
   572                 Uint8 *p = (Uint8 *) src;
   573                 if (srclen < 4) {
   574                     return SDL_ICONV_EINVAL;
   575                 }
   576                 ch = ((Uint32) p[0] << 24) |
   577                     ((Uint32) p[1] << 16) |
   578                     ((Uint32) p[2] << 8) | (Uint32) p[3];
   579                 src += 4;
   580                 srclen -= 4;
   581             }
   582             break;
   583         case ENCODING_UCS4LE:
   584         case ENCODING_UTF32LE:
   585             {
   586                 Uint8 *p = (Uint8 *) src;
   587                 if (srclen < 4) {
   588                     return SDL_ICONV_EINVAL;
   589                 }
   590                 ch = ((Uint32) p[3] << 24) |
   591                     ((Uint32) p[2] << 16) |
   592                     ((Uint32) p[1] << 8) | (Uint32) p[0];
   593                 src += 4;
   594                 srclen -= 4;
   595             }
   596             break;
   597         }
   598 
   599         /* Encode a character */
   600         switch (cd->dst_fmt) {
   601         case ENCODING_ASCII:
   602             {
   603                 Uint8 *p = (Uint8 *) dst;
   604                 if (dstlen < 1) {
   605                     return SDL_ICONV_E2BIG;
   606                 }
   607                 if (ch > 0x7F) {
   608                     *p = UNKNOWN_ASCII;
   609                 } else {
   610                     *p = (Uint8) ch;
   611                 }
   612                 ++dst;
   613                 --dstlen;
   614             }
   615             break;
   616         case ENCODING_LATIN1:
   617             {
   618                 Uint8 *p = (Uint8 *) dst;
   619                 if (dstlen < 1) {
   620                     return SDL_ICONV_E2BIG;
   621                 }
   622                 if (ch > 0xFF) {
   623                     *p = UNKNOWN_ASCII;
   624                 } else {
   625                     *p = (Uint8) ch;
   626                 }
   627                 ++dst;
   628                 --dstlen;
   629             }
   630             break;
   631         case ENCODING_UTF8:    /* RFC 3629 */
   632             {
   633                 Uint8 *p = (Uint8 *) dst;
   634                 if (ch > 0x10FFFF) {
   635                     ch = UNKNOWN_UNICODE;
   636                 }
   637                 if (ch <= 0x7F) {
   638                     if (dstlen < 1) {
   639                         return SDL_ICONV_E2BIG;
   640                     }
   641                     *p = (Uint8) ch;
   642                     ++dst;
   643                     --dstlen;
   644                 } else if (ch <= 0x7FF) {
   645                     if (dstlen < 2) {
   646                         return SDL_ICONV_E2BIG;
   647                     }
   648                     p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
   649                     p[1] = 0x80 | (Uint8) (ch & 0x3F);
   650                     dst += 2;
   651                     dstlen -= 2;
   652                 } else if (ch <= 0xFFFF) {
   653                     if (dstlen < 3) {
   654                         return SDL_ICONV_E2BIG;
   655                     }
   656                     p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
   657                     p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   658                     p[2] = 0x80 | (Uint8) (ch & 0x3F);
   659                     dst += 3;
   660                     dstlen -= 3;
   661                 } else if (ch <= 0x1FFFFF) {
   662                     if (dstlen < 4) {
   663                         return SDL_ICONV_E2BIG;
   664                     }
   665                     p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
   666                     p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   667                     p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   668                     p[3] = 0x80 | (Uint8) (ch & 0x3F);
   669                     dst += 4;
   670                     dstlen -= 4;
   671                 } else if (ch <= 0x3FFFFFF) {
   672                     if (dstlen < 5) {
   673                         return SDL_ICONV_E2BIG;
   674                     }
   675                     p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
   676                     p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   677                     p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   678                     p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   679                     p[4] = 0x80 | (Uint8) (ch & 0x3F);
   680                     dst += 5;
   681                     dstlen -= 5;
   682                 } else {
   683                     if (dstlen < 6) {
   684                         return SDL_ICONV_E2BIG;
   685                     }
   686                     p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
   687                     p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
   688                     p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   689                     p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   690                     p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   691                     p[5] = 0x80 | (Uint8) (ch & 0x3F);
   692                     dst += 6;
   693                     dstlen -= 6;
   694                 }
   695             }
   696             break;
   697         case ENCODING_UTF16BE: /* RFC 2781 */
   698             {
   699                 Uint8 *p = (Uint8 *) dst;
   700                 if (ch > 0x10FFFF) {
   701                     ch = UNKNOWN_UNICODE;
   702                 }
   703                 if (ch < 0x10000) {
   704                     if (dstlen < 2) {
   705                         return SDL_ICONV_E2BIG;
   706                     }
   707                     p[0] = (Uint8) (ch >> 8);
   708                     p[1] = (Uint8) ch;
   709                     dst += 2;
   710                     dstlen -= 2;
   711                 } else {
   712                     Uint16 W1, W2;
   713                     if (dstlen < 4) {
   714                         return SDL_ICONV_E2BIG;
   715                     }
   716                     ch = ch - 0x10000;
   717                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   718                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   719                     p[0] = (Uint8) (W1 >> 8);
   720                     p[1] = (Uint8) W1;
   721                     p[2] = (Uint8) (W2 >> 8);
   722                     p[3] = (Uint8) W2;
   723                     dst += 4;
   724                     dstlen -= 4;
   725                 }
   726             }
   727             break;
   728         case ENCODING_UTF16LE: /* RFC 2781 */
   729             {
   730                 Uint8 *p = (Uint8 *) dst;
   731                 if (ch > 0x10FFFF) {
   732                     ch = UNKNOWN_UNICODE;
   733                 }
   734                 if (ch < 0x10000) {
   735                     if (dstlen < 2) {
   736                         return SDL_ICONV_E2BIG;
   737                     }
   738                     p[1] = (Uint8) (ch >> 8);
   739                     p[0] = (Uint8) ch;
   740                     dst += 2;
   741                     dstlen -= 2;
   742                 } else {
   743                     Uint16 W1, W2;
   744                     if (dstlen < 4) {
   745                         return SDL_ICONV_E2BIG;
   746                     }
   747                     ch = ch - 0x10000;
   748                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   749                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   750                     p[1] = (Uint8) (W1 >> 8);
   751                     p[0] = (Uint8) W1;
   752                     p[3] = (Uint8) (W2 >> 8);
   753                     p[2] = (Uint8) W2;
   754                     dst += 4;
   755                     dstlen -= 4;
   756                 }
   757             }
   758             break;
   759         case ENCODING_UCS2BE:
   760             {
   761                 Uint8 *p = (Uint8 *) dst;
   762                 if (ch > 0xFFFF) {
   763                     ch = UNKNOWN_UNICODE;
   764                 }
   765                 if (dstlen < 2) {
   766                     return SDL_ICONV_E2BIG;
   767                 }
   768                 p[0] = (Uint8) (ch >> 8);
   769                 p[1] = (Uint8) ch;
   770                 dst += 2;
   771                 dstlen -= 2;
   772             }
   773             break;
   774         case ENCODING_UCS2LE:
   775             {
   776                 Uint8 *p = (Uint8 *) dst;
   777                 if (ch > 0xFFFF) {
   778                     ch = UNKNOWN_UNICODE;
   779                 }
   780                 if (dstlen < 2) {
   781                     return SDL_ICONV_E2BIG;
   782                 }
   783                 p[1] = (Uint8) (ch >> 8);
   784                 p[0] = (Uint8) ch;
   785                 dst += 2;
   786                 dstlen -= 2;
   787             }
   788             break;
   789         case ENCODING_UTF32BE:
   790             if (ch > 0x10FFFF) {
   791                 ch = UNKNOWN_UNICODE;
   792             }
   793         case ENCODING_UCS4BE:
   794             if (ch > 0x7FFFFFFF) {
   795                 ch = UNKNOWN_UNICODE;
   796             }
   797             {
   798                 Uint8 *p = (Uint8 *) dst;
   799                 if (dstlen < 4) {
   800                     return SDL_ICONV_E2BIG;
   801                 }
   802                 p[0] = (Uint8) (ch >> 24);
   803                 p[1] = (Uint8) (ch >> 16);
   804                 p[2] = (Uint8) (ch >> 8);
   805                 p[3] = (Uint8) ch;
   806                 dst += 4;
   807                 dstlen -= 4;
   808             }
   809             break;
   810         case ENCODING_UTF32LE:
   811             if (ch > 0x10FFFF) {
   812                 ch = UNKNOWN_UNICODE;
   813             }
   814         case ENCODING_UCS4LE:
   815             if (ch > 0x7FFFFFFF) {
   816                 ch = UNKNOWN_UNICODE;
   817             }
   818             {
   819                 Uint8 *p = (Uint8 *) dst;
   820                 if (dstlen < 4) {
   821                     return SDL_ICONV_E2BIG;
   822                 }
   823                 p[3] = (Uint8) (ch >> 24);
   824                 p[2] = (Uint8) (ch >> 16);
   825                 p[1] = (Uint8) (ch >> 8);
   826                 p[0] = (Uint8) ch;
   827                 dst += 4;
   828                 dstlen -= 4;
   829             }
   830             break;
   831         }
   832 
   833         /* Update state */
   834         *inbuf = src;
   835         *inbytesleft = srclen;
   836         *outbuf = dst;
   837         *outbytesleft = dstlen;
   838         ++total;
   839     }
   840     return total;
   841 }
   842 
   843 int
   844 SDL_iconv_close(SDL_iconv_t cd)
   845 {
   846     if (cd != (SDL_iconv_t)-1) {
   847         SDL_free(cd);
   848     }
   849     return 0;
   850 }
   851 
   852 #endif /* !HAVE_ICONV */
   853 
   854 char *
   855 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
   856                  size_t inbytesleft)
   857 {
   858     SDL_iconv_t cd;
   859     char *string;
   860     size_t stringsize;
   861     char *outbuf;
   862     size_t outbytesleft;
   863     size_t retCode = 0;
   864 
   865     cd = SDL_iconv_open(tocode, fromcode);
   866     if (cd == (SDL_iconv_t) - 1) {
   867         /* See if we can recover here (fixes iconv on Solaris 11) */
   868         if (!tocode || !*tocode) {
   869             tocode = "UTF-8";
   870         }
   871         if (!fromcode || !*fromcode) {
   872             fromcode = "UTF-8";
   873         }
   874         cd = SDL_iconv_open(tocode, fromcode);
   875     }
   876     if (cd == (SDL_iconv_t) - 1) {
   877         return NULL;
   878     }
   879 
   880     stringsize = inbytesleft > 4 ? inbytesleft : 4;
   881     string = SDL_malloc(stringsize);
   882     if (!string) {
   883         SDL_iconv_close(cd);
   884         return NULL;
   885     }
   886     outbuf = string;
   887     outbytesleft = stringsize;
   888     SDL_memset(outbuf, 0, 4);
   889 
   890     while (inbytesleft > 0) {
   891         retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   892         switch (retCode) {
   893         case SDL_ICONV_E2BIG:
   894             {
   895                 char *oldstring = string;
   896                 stringsize *= 2;
   897                 string = SDL_realloc(string, stringsize);
   898                 if (!string) {
   899                     SDL_iconv_close(cd);
   900                     return NULL;
   901                 }
   902                 outbuf = string + (outbuf - oldstring);
   903                 outbytesleft = stringsize - (outbuf - string);
   904                 SDL_memset(outbuf, 0, 4);
   905             }
   906             break;
   907         case SDL_ICONV_EILSEQ:
   908             /* Try skipping some input data - not perfect, but... */
   909             ++inbuf;
   910             --inbytesleft;
   911             break;
   912         case SDL_ICONV_EINVAL:
   913         case SDL_ICONV_ERROR:
   914             /* We can't continue... */
   915             inbytesleft = 0;
   916             break;
   917         }
   918     }
   919     SDL_iconv_close(cd);
   920 
   921     return string;
   922 }
   923 
   924 /* vi: set ts=4 sw=4 expandtab: */