src/stdlib/SDL_iconv.c
author Sylvain Becker <sylvain.becker@gmail.com>
Mon, 24 Feb 2020 21:57:03 +0100
changeset 13551 4efb3eb7a3b3
parent 13422 fd6a12de91c7
permissions -rw-r--r--
Better fix to set the palette opaque, when there is also a colorkey
(see bug 3827)
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 
    22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
    23 #define SDL_DISABLE_ANALYZE_MACROS 1
    24 #endif
    25 
    26 #include "../SDL_internal.h"
    27 
    28 /* This file contains portable iconv functions for SDL */
    29 
    30 #include "SDL_stdinc.h"
    31 #include "SDL_endian.h"
    32 
    33 #if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
    34 #include <iconv.h>
    35 
    36 /* Depending on which standard the iconv() was implemented with,
    37    iconv() may or may not use const char ** for the inbuf param.
    38    If we get this wrong, it's just a warning, so no big deal.
    39 */
    40 #if defined(_XGP6) || defined(__APPLE__) || defined(__RISCOS__) || \
    41     defined(__EMSCRIPTEN__) || \
    42     (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \
    43     (defined(_NEWLIB_VERSION)))
    44 #define ICONV_INBUF_NONCONST
    45 #endif
    46 
    47 #include <errno.h>
    48 
    49 SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t));
    50 
    51 SDL_iconv_t
    52 SDL_iconv_open(const char *tocode, const char *fromcode)
    53 {
    54     return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode));
    55 }
    56 
    57 int
    58 SDL_iconv_close(SDL_iconv_t cd)
    59 {
    60     return iconv_close((iconv_t) ((size_t) cd));
    61 }
    62 
    63 size_t
    64 SDL_iconv(SDL_iconv_t cd,
    65           const char **inbuf, size_t * inbytesleft,
    66           char **outbuf, size_t * outbytesleft)
    67 {
    68     size_t retCode;
    69 #ifdef ICONV_INBUF_NONCONST
    70     retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft);
    71 #else
    72     retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft);
    73 #endif
    74     if (retCode == (size_t) - 1) {
    75         switch (errno) {
    76         case E2BIG:
    77             return SDL_ICONV_E2BIG;
    78         case EILSEQ:
    79             return SDL_ICONV_EILSEQ;
    80         case EINVAL:
    81             return SDL_ICONV_EINVAL;
    82         default:
    83             return SDL_ICONV_ERROR;
    84         }
    85     }
    86     return retCode;
    87 }
    88 
    89 #else
    90 
    91 /* Lots of useful information on Unicode at:
    92     http://www.cl.cam.ac.uk/~mgk25/unicode.html
    93 */
    94 
    95 #define UNICODE_BOM    0xFEFF
    96 
    97 #define UNKNOWN_ASCII    '?'
    98 #define UNKNOWN_UNICODE    0xFFFD
    99 
   100 enum
   101 {
   102     ENCODING_UNKNOWN,
   103     ENCODING_ASCII,
   104     ENCODING_LATIN1,
   105     ENCODING_UTF8,
   106     ENCODING_UTF16,             /* Needs byte order marker */
   107     ENCODING_UTF16BE,
   108     ENCODING_UTF16LE,
   109     ENCODING_UTF32,             /* Needs byte order marker */
   110     ENCODING_UTF32BE,
   111     ENCODING_UTF32LE,
   112     ENCODING_UCS2BE,
   113     ENCODING_UCS2LE,
   114     ENCODING_UCS4BE,
   115     ENCODING_UCS4LE,
   116 };
   117 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
   118 #define ENCODING_UTF16NATIVE    ENCODING_UTF16BE
   119 #define ENCODING_UTF32NATIVE    ENCODING_UTF32BE
   120 #define ENCODING_UCS2NATIVE     ENCODING_UCS2BE
   121 #define ENCODING_UCS4NATIVE     ENCODING_UCS4BE
   122 #else
   123 #define ENCODING_UTF16NATIVE    ENCODING_UTF16LE
   124 #define ENCODING_UTF32NATIVE    ENCODING_UTF32LE
   125 #define ENCODING_UCS2NATIVE     ENCODING_UCS2LE
   126 #define ENCODING_UCS4NATIVE     ENCODING_UCS4LE
   127 #endif
   128 
   129 struct _SDL_iconv_t
   130 {
   131     int src_fmt;
   132     int dst_fmt;
   133 };
   134 
   135 static struct
   136 {
   137     const char *name;
   138     int format;
   139 } encodings[] = {
   140 /* *INDENT-OFF* */
   141     { "ASCII", ENCODING_ASCII },
   142     { "US-ASCII", ENCODING_ASCII },
   143     { "8859-1", ENCODING_LATIN1 },
   144     { "ISO-8859-1", ENCODING_LATIN1 },
   145     { "UTF8", ENCODING_UTF8 },
   146     { "UTF-8", ENCODING_UTF8 },
   147     { "UTF16", ENCODING_UTF16 },
   148     { "UTF-16", ENCODING_UTF16 },
   149     { "UTF16BE", ENCODING_UTF16BE },
   150     { "UTF-16BE", ENCODING_UTF16BE },
   151     { "UTF16LE", ENCODING_UTF16LE },
   152     { "UTF-16LE", ENCODING_UTF16LE },
   153     { "UTF32", ENCODING_UTF32 },
   154     { "UTF-32", ENCODING_UTF32 },
   155     { "UTF32BE", ENCODING_UTF32BE },
   156     { "UTF-32BE", ENCODING_UTF32BE },
   157     { "UTF32LE", ENCODING_UTF32LE },
   158     { "UTF-32LE", ENCODING_UTF32LE },
   159     { "UCS2", ENCODING_UCS2BE },
   160     { "UCS-2", ENCODING_UCS2BE },
   161     { "UCS-2LE", ENCODING_UCS2LE },
   162     { "UCS-2BE", ENCODING_UCS2BE },
   163     { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
   164     { "UCS4", ENCODING_UCS4BE },
   165     { "UCS-4", ENCODING_UCS4BE },
   166     { "UCS-4LE", ENCODING_UCS4LE },
   167     { "UCS-4BE", ENCODING_UCS4BE },
   168     { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
   169 /* *INDENT-ON* */
   170 };
   171 
   172 static const char *
   173 getlocale(char *buffer, size_t bufsize)
   174 {
   175     const char *lang;
   176     char *ptr;
   177 
   178     lang = SDL_getenv("LC_ALL");
   179     if (!lang) {
   180         lang = SDL_getenv("LC_CTYPE");
   181     }
   182     if (!lang) {
   183         lang = SDL_getenv("LC_MESSAGES");
   184     }
   185     if (!lang) {
   186         lang = SDL_getenv("LANG");
   187     }
   188     if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
   189         lang = "ASCII";
   190     }
   191 
   192     /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
   193     ptr = SDL_strchr(lang, '.');
   194     if (ptr != NULL) {
   195         lang = ptr + 1;
   196     }
   197 
   198     SDL_strlcpy(buffer, lang, bufsize);
   199     ptr = SDL_strchr(buffer, '@');
   200     if (ptr != NULL) {
   201         *ptr = '\0';            /* chop end of string. */
   202     }
   203 
   204     return buffer;
   205 }
   206 
   207 SDL_iconv_t
   208 SDL_iconv_open(const char *tocode, const char *fromcode)
   209 {
   210     int src_fmt = ENCODING_UNKNOWN;
   211     int dst_fmt = ENCODING_UNKNOWN;
   212     int i;
   213     char fromcode_buffer[64];
   214     char tocode_buffer[64];
   215 
   216     if (!fromcode || !*fromcode) {
   217         fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
   218     }
   219     if (!tocode || !*tocode) {
   220         tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
   221     }
   222     for (i = 0; i < SDL_arraysize(encodings); ++i) {
   223         if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
   224             src_fmt = encodings[i].format;
   225             if (dst_fmt != ENCODING_UNKNOWN) {
   226                 break;
   227             }
   228         }
   229         if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
   230             dst_fmt = encodings[i].format;
   231             if (src_fmt != ENCODING_UNKNOWN) {
   232                 break;
   233             }
   234         }
   235     }
   236     if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
   237         SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
   238         if (cd) {
   239             cd->src_fmt = src_fmt;
   240             cd->dst_fmt = dst_fmt;
   241             return cd;
   242         }
   243     }
   244     return (SDL_iconv_t) - 1;
   245 }
   246 
   247 size_t
   248 SDL_iconv(SDL_iconv_t cd,
   249           const char **inbuf, size_t * inbytesleft,
   250           char **outbuf, size_t * outbytesleft)
   251 {
   252     /* For simplicity, we'll convert everything to and from UCS-4 */
   253     const char *src;
   254     char *dst;
   255     size_t srclen, dstlen;
   256     Uint32 ch = 0;
   257     size_t total;
   258 
   259     if (!inbuf || !*inbuf) {
   260         /* Reset the context */
   261         return 0;
   262     }
   263     if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
   264         return SDL_ICONV_E2BIG;
   265     }
   266     src = *inbuf;
   267     srclen = (inbytesleft ? *inbytesleft : 0);
   268     dst = *outbuf;
   269     dstlen = *outbytesleft;
   270 
   271     switch (cd->src_fmt) {
   272     case ENCODING_UTF16:
   273         /* Scan for a byte order marker */
   274         {
   275             Uint8 *p = (Uint8 *) src;
   276             size_t n = srclen / 2;
   277             while (n) {
   278                 if (p[0] == 0xFF && p[1] == 0xFE) {
   279                     cd->src_fmt = ENCODING_UTF16BE;
   280                     break;
   281                 } else if (p[0] == 0xFE && p[1] == 0xFF) {
   282                     cd->src_fmt = ENCODING_UTF16LE;
   283                     break;
   284                 }
   285                 p += 2;
   286                 --n;
   287             }
   288             if (n == 0) {
   289                 /* We can't tell, default to host order */
   290                 cd->src_fmt = ENCODING_UTF16NATIVE;
   291             }
   292         }
   293         break;
   294     case ENCODING_UTF32:
   295         /* Scan for a byte order marker */
   296         {
   297             Uint8 *p = (Uint8 *) src;
   298             size_t n = srclen / 4;
   299             while (n) {
   300                 if (p[0] == 0xFF && p[1] == 0xFE &&
   301                     p[2] == 0x00 && p[3] == 0x00) {
   302                     cd->src_fmt = ENCODING_UTF32BE;
   303                     break;
   304                 } else if (p[0] == 0x00 && p[1] == 0x00 &&
   305                            p[2] == 0xFE && p[3] == 0xFF) {
   306                     cd->src_fmt = ENCODING_UTF32LE;
   307                     break;
   308                 }
   309                 p += 4;
   310                 --n;
   311             }
   312             if (n == 0) {
   313                 /* We can't tell, default to host order */
   314                 cd->src_fmt = ENCODING_UTF32NATIVE;
   315             }
   316         }
   317         break;
   318     }
   319 
   320     switch (cd->dst_fmt) {
   321     case ENCODING_UTF16:
   322         /* Default to host order, need to add byte order marker */
   323         if (dstlen < 2) {
   324             return SDL_ICONV_E2BIG;
   325         }
   326         *(Uint16 *) dst = UNICODE_BOM;
   327         dst += 2;
   328         dstlen -= 2;
   329         cd->dst_fmt = ENCODING_UTF16NATIVE;
   330         break;
   331     case ENCODING_UTF32:
   332         /* Default to host order, need to add byte order marker */
   333         if (dstlen < 4) {
   334             return SDL_ICONV_E2BIG;
   335         }
   336         *(Uint32 *) dst = UNICODE_BOM;
   337         dst += 4;
   338         dstlen -= 4;
   339         cd->dst_fmt = ENCODING_UTF32NATIVE;
   340         break;
   341     }
   342 
   343     total = 0;
   344     while (srclen > 0) {
   345         /* Decode a character */
   346         switch (cd->src_fmt) {
   347         case ENCODING_ASCII:
   348             {
   349                 Uint8 *p = (Uint8 *) src;
   350                 ch = (Uint32) (p[0] & 0x7F);
   351                 ++src;
   352                 --srclen;
   353             }
   354             break;
   355         case ENCODING_LATIN1:
   356             {
   357                 Uint8 *p = (Uint8 *) src;
   358                 ch = (Uint32) p[0];
   359                 ++src;
   360                 --srclen;
   361             }
   362             break;
   363         case ENCODING_UTF8:    /* RFC 3629 */
   364             {
   365                 Uint8 *p = (Uint8 *) src;
   366                 size_t left = 0;
   367                 SDL_bool overlong = SDL_FALSE;
   368                 if (p[0] >= 0xFC) {
   369                     if ((p[0] & 0xFE) != 0xFC) {
   370                         /* Skip illegal sequences
   371                            return SDL_ICONV_EILSEQ;
   372                          */
   373                         ch = UNKNOWN_UNICODE;
   374                     } else {
   375                         if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) {
   376                             overlong = SDL_TRUE;
   377                         }
   378                         ch = (Uint32) (p[0] & 0x01);
   379                         left = 5;
   380                     }
   381                 } else if (p[0] >= 0xF8) {
   382                     if ((p[0] & 0xFC) != 0xF8) {
   383                         /* Skip illegal sequences
   384                            return SDL_ICONV_EILSEQ;
   385                          */
   386                         ch = UNKNOWN_UNICODE;
   387                     } else {
   388                         if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) {
   389                             overlong = SDL_TRUE;
   390                         }
   391                         ch = (Uint32) (p[0] & 0x03);
   392                         left = 4;
   393                     }
   394                 } else if (p[0] >= 0xF0) {
   395                     if ((p[0] & 0xF8) != 0xF0) {
   396                         /* Skip illegal sequences
   397                            return SDL_ICONV_EILSEQ;
   398                          */
   399                         ch = UNKNOWN_UNICODE;
   400                     } else {
   401                         if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
   402                             overlong = SDL_TRUE;
   403                         }
   404                         ch = (Uint32) (p[0] & 0x07);
   405                         left = 3;
   406                     }
   407                 } else if (p[0] >= 0xE0) {
   408                     if ((p[0] & 0xF0) != 0xE0) {
   409                         /* Skip illegal sequences
   410                            return SDL_ICONV_EILSEQ;
   411                          */
   412                         ch = UNKNOWN_UNICODE;
   413                     } else {
   414                         if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
   415                             overlong = SDL_TRUE;
   416                         }
   417                         ch = (Uint32) (p[0] & 0x0F);
   418                         left = 2;
   419                     }
   420                 } else if (p[0] >= 0xC0) {
   421                     if ((p[0] & 0xE0) != 0xC0) {
   422                         /* Skip illegal sequences
   423                            return SDL_ICONV_EILSEQ;
   424                          */
   425                         ch = UNKNOWN_UNICODE;
   426                     } else {
   427                         if ((p[0] & 0xDE) == 0xC0) {
   428                             overlong = SDL_TRUE;
   429                         }
   430                         ch = (Uint32) (p[0] & 0x1F);
   431                         left = 1;
   432                     }
   433                 } else {
   434                     if ((p[0] & 0x80) != 0x00) {
   435                         /* Skip illegal sequences
   436                            return SDL_ICONV_EILSEQ;
   437                          */
   438                         ch = UNKNOWN_UNICODE;
   439                     } else {
   440                         ch = (Uint32) p[0];
   441                     }
   442                 }
   443                 ++src;
   444                 --srclen;
   445                 if (srclen < left) {
   446                     return SDL_ICONV_EINVAL;
   447                 }
   448                 while (left--) {
   449                     ++p;
   450                     if ((p[0] & 0xC0) != 0x80) {
   451                         /* Skip illegal sequences
   452                            return SDL_ICONV_EILSEQ;
   453                          */
   454                         ch = UNKNOWN_UNICODE;
   455                         break;
   456                     }
   457                     ch <<= 6;
   458                     ch |= (p[0] & 0x3F);
   459                     ++src;
   460                     --srclen;
   461                 }
   462                 if (overlong) {
   463                     /* Potential security risk
   464                        return SDL_ICONV_EILSEQ;
   465                      */
   466                     ch = UNKNOWN_UNICODE;
   467                 }
   468                 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
   469                     (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
   470                     /* Skip illegal sequences
   471                        return SDL_ICONV_EILSEQ;
   472                      */
   473                     ch = UNKNOWN_UNICODE;
   474                 }
   475             }
   476             break;
   477         case ENCODING_UTF16BE: /* RFC 2781 */
   478             {
   479                 Uint8 *p = (Uint8 *) src;
   480                 Uint16 W1, W2;
   481                 if (srclen < 2) {
   482                     return SDL_ICONV_EINVAL;
   483                 }
   484                 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   485                 src += 2;
   486                 srclen -= 2;
   487                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   488                     ch = (Uint32) W1;
   489                     break;
   490                 }
   491                 if (W1 > 0xDBFF) {
   492                     /* Skip illegal sequences
   493                        return SDL_ICONV_EILSEQ;
   494                      */
   495                     ch = UNKNOWN_UNICODE;
   496                     break;
   497                 }
   498                 if (srclen < 2) {
   499                     return SDL_ICONV_EINVAL;
   500                 }
   501                 p = (Uint8 *) src;
   502                 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   503                 src += 2;
   504                 srclen -= 2;
   505                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   506                     /* Skip illegal sequences
   507                        return SDL_ICONV_EILSEQ;
   508                      */
   509                     ch = UNKNOWN_UNICODE;
   510                     break;
   511                 }
   512                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   513                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   514             }
   515             break;
   516         case ENCODING_UTF16LE: /* RFC 2781 */
   517             {
   518                 Uint8 *p = (Uint8 *) src;
   519                 Uint16 W1, W2;
   520                 if (srclen < 2) {
   521                     return SDL_ICONV_EINVAL;
   522                 }
   523                 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   524                 src += 2;
   525                 srclen -= 2;
   526                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   527                     ch = (Uint32) W1;
   528                     break;
   529                 }
   530                 if (W1 > 0xDBFF) {
   531                     /* Skip illegal sequences
   532                        return SDL_ICONV_EILSEQ;
   533                      */
   534                     ch = UNKNOWN_UNICODE;
   535                     break;
   536                 }
   537                 if (srclen < 2) {
   538                     return SDL_ICONV_EINVAL;
   539                 }
   540                 p = (Uint8 *) src;
   541                 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   542                 src += 2;
   543                 srclen -= 2;
   544                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   545                     /* Skip illegal sequences
   546                        return SDL_ICONV_EILSEQ;
   547                      */
   548                     ch = UNKNOWN_UNICODE;
   549                     break;
   550                 }
   551                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   552                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   553             }
   554             break;
   555         case ENCODING_UCS2LE:
   556             {
   557                 Uint8 *p = (Uint8 *) src;
   558                 if (srclen < 2) {
   559                     return SDL_ICONV_EINVAL;
   560                 }
   561                 ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
   562                 src += 2;
   563                 srclen -= 2;
   564             }
   565             break;
   566         case ENCODING_UCS2BE:
   567             {
   568                 Uint8 *p = (Uint8 *) src;
   569                 if (srclen < 2) {
   570                     return SDL_ICONV_EINVAL;
   571                 }
   572                 ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
   573                 src += 2;
   574                 srclen -= 2;
   575             }
   576             break;
   577         case ENCODING_UCS4BE:
   578         case ENCODING_UTF32BE:
   579             {
   580                 Uint8 *p = (Uint8 *) src;
   581                 if (srclen < 4) {
   582                     return SDL_ICONV_EINVAL;
   583                 }
   584                 ch = ((Uint32) p[0] << 24) |
   585                     ((Uint32) p[1] << 16) |
   586                     ((Uint32) p[2] << 8) | (Uint32) p[3];
   587                 src += 4;
   588                 srclen -= 4;
   589             }
   590             break;
   591         case ENCODING_UCS4LE:
   592         case ENCODING_UTF32LE:
   593             {
   594                 Uint8 *p = (Uint8 *) src;
   595                 if (srclen < 4) {
   596                     return SDL_ICONV_EINVAL;
   597                 }
   598                 ch = ((Uint32) p[3] << 24) |
   599                     ((Uint32) p[2] << 16) |
   600                     ((Uint32) p[1] << 8) | (Uint32) p[0];
   601                 src += 4;
   602                 srclen -= 4;
   603             }
   604             break;
   605         }
   606 
   607         /* Encode a character */
   608         switch (cd->dst_fmt) {
   609         case ENCODING_ASCII:
   610             {
   611                 Uint8 *p = (Uint8 *) dst;
   612                 if (dstlen < 1) {
   613                     return SDL_ICONV_E2BIG;
   614                 }
   615                 if (ch > 0x7F) {
   616                     *p = UNKNOWN_ASCII;
   617                 } else {
   618                     *p = (Uint8) ch;
   619                 }
   620                 ++dst;
   621                 --dstlen;
   622             }
   623             break;
   624         case ENCODING_LATIN1:
   625             {
   626                 Uint8 *p = (Uint8 *) dst;
   627                 if (dstlen < 1) {
   628                     return SDL_ICONV_E2BIG;
   629                 }
   630                 if (ch > 0xFF) {
   631                     *p = UNKNOWN_ASCII;
   632                 } else {
   633                     *p = (Uint8) ch;
   634                 }
   635                 ++dst;
   636                 --dstlen;
   637             }
   638             break;
   639         case ENCODING_UTF8:    /* RFC 3629 */
   640             {
   641                 Uint8 *p = (Uint8 *) dst;
   642                 if (ch > 0x10FFFF) {
   643                     ch = UNKNOWN_UNICODE;
   644                 }
   645                 if (ch <= 0x7F) {
   646                     if (dstlen < 1) {
   647                         return SDL_ICONV_E2BIG;
   648                     }
   649                     *p = (Uint8) ch;
   650                     ++dst;
   651                     --dstlen;
   652                 } else if (ch <= 0x7FF) {
   653                     if (dstlen < 2) {
   654                         return SDL_ICONV_E2BIG;
   655                     }
   656                     p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
   657                     p[1] = 0x80 | (Uint8) (ch & 0x3F);
   658                     dst += 2;
   659                     dstlen -= 2;
   660                 } else if (ch <= 0xFFFF) {
   661                     if (dstlen < 3) {
   662                         return SDL_ICONV_E2BIG;
   663                     }
   664                     p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
   665                     p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   666                     p[2] = 0x80 | (Uint8) (ch & 0x3F);
   667                     dst += 3;
   668                     dstlen -= 3;
   669                 } else if (ch <= 0x1FFFFF) {
   670                     if (dstlen < 4) {
   671                         return SDL_ICONV_E2BIG;
   672                     }
   673                     p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
   674                     p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   675                     p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   676                     p[3] = 0x80 | (Uint8) (ch & 0x3F);
   677                     dst += 4;
   678                     dstlen -= 4;
   679                 } else if (ch <= 0x3FFFFFF) {
   680                     if (dstlen < 5) {
   681                         return SDL_ICONV_E2BIG;
   682                     }
   683                     p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
   684                     p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   685                     p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   686                     p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   687                     p[4] = 0x80 | (Uint8) (ch & 0x3F);
   688                     dst += 5;
   689                     dstlen -= 5;
   690                 } else {
   691                     if (dstlen < 6) {
   692                         return SDL_ICONV_E2BIG;
   693                     }
   694                     p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
   695                     p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
   696                     p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   697                     p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   698                     p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   699                     p[5] = 0x80 | (Uint8) (ch & 0x3F);
   700                     dst += 6;
   701                     dstlen -= 6;
   702                 }
   703             }
   704             break;
   705         case ENCODING_UTF16BE: /* RFC 2781 */
   706             {
   707                 Uint8 *p = (Uint8 *) dst;
   708                 if (ch > 0x10FFFF) {
   709                     ch = UNKNOWN_UNICODE;
   710                 }
   711                 if (ch < 0x10000) {
   712                     if (dstlen < 2) {
   713                         return SDL_ICONV_E2BIG;
   714                     }
   715                     p[0] = (Uint8) (ch >> 8);
   716                     p[1] = (Uint8) ch;
   717                     dst += 2;
   718                     dstlen -= 2;
   719                 } else {
   720                     Uint16 W1, W2;
   721                     if (dstlen < 4) {
   722                         return SDL_ICONV_E2BIG;
   723                     }
   724                     ch = ch - 0x10000;
   725                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   726                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   727                     p[0] = (Uint8) (W1 >> 8);
   728                     p[1] = (Uint8) W1;
   729                     p[2] = (Uint8) (W2 >> 8);
   730                     p[3] = (Uint8) W2;
   731                     dst += 4;
   732                     dstlen -= 4;
   733                 }
   734             }
   735             break;
   736         case ENCODING_UTF16LE: /* RFC 2781 */
   737             {
   738                 Uint8 *p = (Uint8 *) dst;
   739                 if (ch > 0x10FFFF) {
   740                     ch = UNKNOWN_UNICODE;
   741                 }
   742                 if (ch < 0x10000) {
   743                     if (dstlen < 2) {
   744                         return SDL_ICONV_E2BIG;
   745                     }
   746                     p[1] = (Uint8) (ch >> 8);
   747                     p[0] = (Uint8) ch;
   748                     dst += 2;
   749                     dstlen -= 2;
   750                 } else {
   751                     Uint16 W1, W2;
   752                     if (dstlen < 4) {
   753                         return SDL_ICONV_E2BIG;
   754                     }
   755                     ch = ch - 0x10000;
   756                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   757                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   758                     p[1] = (Uint8) (W1 >> 8);
   759                     p[0] = (Uint8) W1;
   760                     p[3] = (Uint8) (W2 >> 8);
   761                     p[2] = (Uint8) W2;
   762                     dst += 4;
   763                     dstlen -= 4;
   764                 }
   765             }
   766             break;
   767         case ENCODING_UCS2BE:
   768             {
   769                 Uint8 *p = (Uint8 *) dst;
   770                 if (ch > 0xFFFF) {
   771                     ch = UNKNOWN_UNICODE;
   772                 }
   773                 if (dstlen < 2) {
   774                     return SDL_ICONV_E2BIG;
   775                 }
   776                 p[0] = (Uint8) (ch >> 8);
   777                 p[1] = (Uint8) ch;
   778                 dst += 2;
   779                 dstlen -= 2;
   780             }
   781             break;
   782         case ENCODING_UCS2LE:
   783             {
   784                 Uint8 *p = (Uint8 *) dst;
   785                 if (ch > 0xFFFF) {
   786                     ch = UNKNOWN_UNICODE;
   787                 }
   788                 if (dstlen < 2) {
   789                     return SDL_ICONV_E2BIG;
   790                 }
   791                 p[1] = (Uint8) (ch >> 8);
   792                 p[0] = (Uint8) ch;
   793                 dst += 2;
   794                 dstlen -= 2;
   795             }
   796             break;
   797         case ENCODING_UTF32BE:
   798             if (ch > 0x10FFFF) {
   799                 ch = UNKNOWN_UNICODE;
   800             }
   801             /* fallthrough */
   802         case ENCODING_UCS4BE:
   803             if (ch > 0x7FFFFFFF) {
   804                 ch = UNKNOWN_UNICODE;
   805             }
   806             {
   807                 Uint8 *p = (Uint8 *) dst;
   808                 if (dstlen < 4) {
   809                     return SDL_ICONV_E2BIG;
   810                 }
   811                 p[0] = (Uint8) (ch >> 24);
   812                 p[1] = (Uint8) (ch >> 16);
   813                 p[2] = (Uint8) (ch >> 8);
   814                 p[3] = (Uint8) ch;
   815                 dst += 4;
   816                 dstlen -= 4;
   817             }
   818             break;
   819         case ENCODING_UTF32LE:
   820             if (ch > 0x10FFFF) {
   821                 ch = UNKNOWN_UNICODE;
   822             }
   823             /* fallthrough */
   824         case ENCODING_UCS4LE:
   825             if (ch > 0x7FFFFFFF) {
   826                 ch = UNKNOWN_UNICODE;
   827             }
   828             {
   829                 Uint8 *p = (Uint8 *) dst;
   830                 if (dstlen < 4) {
   831                     return SDL_ICONV_E2BIG;
   832                 }
   833                 p[3] = (Uint8) (ch >> 24);
   834                 p[2] = (Uint8) (ch >> 16);
   835                 p[1] = (Uint8) (ch >> 8);
   836                 p[0] = (Uint8) ch;
   837                 dst += 4;
   838                 dstlen -= 4;
   839             }
   840             break;
   841         }
   842 
   843         /* Update state */
   844         *inbuf = src;
   845         *inbytesleft = srclen;
   846         *outbuf = dst;
   847         *outbytesleft = dstlen;
   848         ++total;
   849     }
   850     return total;
   851 }
   852 
   853 int
   854 SDL_iconv_close(SDL_iconv_t cd)
   855 {
   856     if (cd != (SDL_iconv_t)-1) {
   857         SDL_free(cd);
   858     }
   859     return 0;
   860 }
   861 
   862 #endif /* !HAVE_ICONV */
   863 
   864 char *
   865 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
   866                  size_t inbytesleft)
   867 {
   868     SDL_iconv_t cd;
   869     char *string;
   870     size_t stringsize;
   871     char *outbuf;
   872     size_t outbytesleft;
   873     size_t retCode = 0;
   874 
   875     cd = SDL_iconv_open(tocode, fromcode);
   876     if (cd == (SDL_iconv_t) - 1) {
   877         /* See if we can recover here (fixes iconv on Solaris 11) */
   878         if (!tocode || !*tocode) {
   879             tocode = "UTF-8";
   880         }
   881         if (!fromcode || !*fromcode) {
   882             fromcode = "UTF-8";
   883         }
   884         cd = SDL_iconv_open(tocode, fromcode);
   885     }
   886     if (cd == (SDL_iconv_t) - 1) {
   887         return NULL;
   888     }
   889 
   890     stringsize = inbytesleft > 4 ? inbytesleft : 4;
   891     string = (char *) SDL_malloc(stringsize);
   892     if (!string) {
   893         SDL_iconv_close(cd);
   894         return NULL;
   895     }
   896     outbuf = string;
   897     outbytesleft = stringsize;
   898     SDL_memset(outbuf, 0, 4);
   899 
   900     while (inbytesleft > 0) {
   901         retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   902         switch (retCode) {
   903         case SDL_ICONV_E2BIG:
   904             {
   905                 char *oldstring = string;
   906                 stringsize *= 2;
   907                 string = (char *) SDL_realloc(string, stringsize);
   908                 if (!string) {
   909                     SDL_iconv_close(cd);
   910                     return NULL;
   911                 }
   912                 outbuf = string + (outbuf - oldstring);
   913                 outbytesleft = stringsize - (outbuf - string);
   914                 SDL_memset(outbuf, 0, 4);
   915             }
   916             break;
   917         case SDL_ICONV_EILSEQ:
   918             /* Try skipping some input data - not perfect, but... */
   919             ++inbuf;
   920             --inbytesleft;
   921             break;
   922         case SDL_ICONV_EINVAL:
   923         case SDL_ICONV_ERROR:
   924             /* We can't continue... */
   925             inbytesleft = 0;
   926             break;
   927         }
   928     }
   929     SDL_iconv_close(cd);
   930 
   931     return string;
   932 }
   933 
   934 /* vi: set ts=4 sw=4 expandtab: */