src/stdlib/SDL_iconv.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 28 Oct 2012 13:03:45 -0700
changeset 6610 4032f8efdfe7
parent 6138 4c64952a58fb
child 6885 700f1b25f77f
permissions -rw-r--r--
It turns out that UCS2 and UCS4 are defined as big-endian encodings
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2012 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /* This file contains portable iconv functions for SDL */
    24 
    25 #include "SDL_stdinc.h"
    26 #include "SDL_endian.h"
    27 
    28 #ifdef HAVE_ICONV
    29 
    30 /* Depending on which standard the iconv() was implemented with,
    31    iconv() may or may not use const char ** for the inbuf param.
    32    If we get this wrong, it's just a warning, so no big deal.
    33 */
    34 #if defined(_XGP6) || defined(__APPLE__) || \
    35     (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)))
    36 #define ICONV_INBUF_NONCONST
    37 #endif
    38 
    39 #include <errno.h>
    40 
    41 size_t
    42 SDL_iconv(SDL_iconv_t cd,
    43           const char **inbuf, size_t * inbytesleft,
    44           char **outbuf, size_t * outbytesleft)
    45 {
    46     size_t retCode;
    47 #ifdef ICONV_INBUF_NONCONST
    48     retCode = iconv(cd, (char **) inbuf, inbytesleft, outbuf, outbytesleft);
    49 #else
    50     retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
    51 #endif
    52     if (retCode == (size_t) - 1) {
    53         switch (errno) {
    54         case E2BIG:
    55             return SDL_ICONV_E2BIG;
    56         case EILSEQ:
    57             return SDL_ICONV_EILSEQ;
    58         case EINVAL:
    59             return SDL_ICONV_EINVAL;
    60         default:
    61             return SDL_ICONV_ERROR;
    62         }
    63     }
    64     return retCode;
    65 }
    66 
    67 #else
    68 
    69 /* Lots of useful information on Unicode at:
    70 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    71 */
    72 
    73 #define UNICODE_BOM	0xFEFF
    74 
    75 #define UNKNOWN_ASCII	'?'
    76 #define UNKNOWN_UNICODE	0xFFFD
    77 
    78 enum
    79 {
    80     ENCODING_UNKNOWN,
    81     ENCODING_ASCII,
    82     ENCODING_LATIN1,
    83     ENCODING_UTF8,
    84     ENCODING_UTF16,             /* Needs byte order marker */
    85     ENCODING_UTF16BE,
    86     ENCODING_UTF16LE,
    87     ENCODING_UTF32,             /* Needs byte order marker */
    88     ENCODING_UTF32BE,
    89     ENCODING_UTF32LE,
    90     ENCODING_UCS2BE,
    91     ENCODING_UCS2LE,
    92     ENCODING_UCS4BE,
    93     ENCODING_UCS4LE,
    94 };
    95 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    96 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    97 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    98 #define ENCODING_UCS2NATIVE     ENCODING_UCS2BE
    99 #define ENCODING_UCS4NATIVE     ENCODING_UCS4BE
   100 #else
   101 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
   102 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
   103 #define ENCODING_UCS2NATIVE     ENCODING_UCS2LE
   104 #define ENCODING_UCS4NATIVE     ENCODING_UCS4LE
   105 #endif
   106 
   107 struct _SDL_iconv_t
   108 {
   109     int src_fmt;
   110     int dst_fmt;
   111 };
   112 
   113 static struct
   114 {
   115     const char *name;
   116     int format;
   117 } encodings[] = {
   118 /* *INDENT-OFF* */
   119     { "ASCII", ENCODING_ASCII },
   120     { "US-ASCII", ENCODING_ASCII },
   121     { "8859-1", ENCODING_LATIN1 },
   122     { "ISO-8859-1", ENCODING_LATIN1 },
   123     { "UTF8", ENCODING_UTF8 },
   124     { "UTF-8", ENCODING_UTF8 },
   125     { "UTF16", ENCODING_UTF16 },
   126     { "UTF-16", ENCODING_UTF16 },
   127     { "UTF16BE", ENCODING_UTF16BE },
   128     { "UTF-16BE", ENCODING_UTF16BE },
   129     { "UTF16LE", ENCODING_UTF16LE },
   130     { "UTF-16LE", ENCODING_UTF16LE },
   131     { "UTF32", ENCODING_UTF32 },
   132     { "UTF-32", ENCODING_UTF32 },
   133     { "UTF32BE", ENCODING_UTF32BE },
   134     { "UTF-32BE", ENCODING_UTF32BE },
   135     { "UTF32LE", ENCODING_UTF32LE },
   136     { "UTF-32LE", ENCODING_UTF32LE },
   137     { "UCS2", ENCODING_UCS2BE },
   138     { "UCS-2", ENCODING_UCS2BE },
   139     { "UCS-2LE", ENCODING_UCS2LE },
   140     { "UCS-2BE", ENCODING_UCS2BE },
   141     { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
   142     { "UCS4", ENCODING_UCS4BE },
   143     { "UCS-4", ENCODING_UCS4BE },
   144     { "UCS-4LE", ENCODING_UCS4LE },
   145     { "UCS-4BE", ENCODING_UCS4BE },
   146     { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
   147 /* *INDENT-ON* */
   148 };
   149 
   150 static const char *
   151 getlocale(char *buffer, size_t bufsize)
   152 {
   153     const char *lang;
   154     char *ptr;
   155 
   156     lang = SDL_getenv("LC_ALL");
   157     if (!lang) {
   158         lang = SDL_getenv("LC_CTYPE");
   159     }
   160     if (!lang) {
   161         lang = SDL_getenv("LC_MESSAGES");
   162     }
   163     if (!lang) {
   164         lang = SDL_getenv("LANG");
   165     }
   166     if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
   167         lang = "ASCII";
   168     }
   169 
   170     /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
   171     ptr = SDL_strchr(lang, '.');
   172     if (ptr != NULL) {
   173         lang = ptr + 1;
   174     }
   175 
   176     SDL_strlcpy(buffer, lang, bufsize);
   177     ptr = SDL_strchr(buffer, '@');
   178     if (ptr != NULL) {
   179         *ptr = '\0';            /* chop end of string. */
   180     }
   181 
   182     return buffer;
   183 }
   184 
   185 SDL_iconv_t
   186 SDL_iconv_open(const char *tocode, const char *fromcode)
   187 {
   188     int src_fmt = ENCODING_UNKNOWN;
   189     int dst_fmt = ENCODING_UNKNOWN;
   190     int i;
   191     char fromcode_buffer[64];
   192     char tocode_buffer[64];
   193 
   194     if (!fromcode || !*fromcode) {
   195         fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
   196     }
   197     if (!tocode || !*tocode) {
   198         tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
   199     }
   200     for (i = 0; i < SDL_arraysize(encodings); ++i) {
   201         if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
   202             src_fmt = encodings[i].format;
   203             if (dst_fmt != ENCODING_UNKNOWN) {
   204                 break;
   205             }
   206         }
   207         if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
   208             dst_fmt = encodings[i].format;
   209             if (src_fmt != ENCODING_UNKNOWN) {
   210                 break;
   211             }
   212         }
   213     }
   214     if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
   215         SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
   216         if (cd) {
   217             cd->src_fmt = src_fmt;
   218             cd->dst_fmt = dst_fmt;
   219             return cd;
   220         }
   221     }
   222     return (SDL_iconv_t) - 1;
   223 }
   224 
   225 size_t
   226 SDL_iconv(SDL_iconv_t cd,
   227           const char **inbuf, size_t * inbytesleft,
   228           char **outbuf, size_t * outbytesleft)
   229 {
   230     /* For simplicity, we'll convert everything to and from UCS-4 */
   231     const char *src;
   232     char *dst;
   233     size_t srclen, dstlen;
   234     Uint32 ch = 0;
   235     size_t total;
   236 
   237     if (!inbuf || !*inbuf) {
   238         /* Reset the context */
   239         return 0;
   240     }
   241     if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
   242         return SDL_ICONV_E2BIG;
   243     }
   244     src = *inbuf;
   245     srclen = (inbytesleft ? *inbytesleft : 0);
   246     dst = *outbuf;
   247     dstlen = *outbytesleft;
   248 
   249     switch (cd->src_fmt) {
   250     case ENCODING_UTF16:
   251         /* Scan for a byte order marker */
   252         {
   253             Uint8 *p = (Uint8 *) src;
   254             size_t n = srclen / 2;
   255             while (n) {
   256                 if (p[0] == 0xFF && p[1] == 0xFE) {
   257                     cd->src_fmt = ENCODING_UTF16BE;
   258                     break;
   259                 } else if (p[0] == 0xFE && p[1] == 0xFF) {
   260                     cd->src_fmt = ENCODING_UTF16LE;
   261                     break;
   262                 }
   263                 p += 2;
   264                 --n;
   265             }
   266             if (n == 0) {
   267                 /* We can't tell, default to host order */
   268                 cd->src_fmt = ENCODING_UTF16NATIVE;
   269             }
   270         }
   271         break;
   272     case ENCODING_UTF32:
   273         /* Scan for a byte order marker */
   274         {
   275             Uint8 *p = (Uint8 *) src;
   276             size_t n = srclen / 4;
   277             while (n) {
   278                 if (p[0] == 0xFF && p[1] == 0xFE &&
   279                     p[2] == 0x00 && p[3] == 0x00) {
   280                     cd->src_fmt = ENCODING_UTF32BE;
   281                     break;
   282                 } else if (p[0] == 0x00 && p[1] == 0x00 &&
   283                            p[2] == 0xFE && p[3] == 0xFF) {
   284                     cd->src_fmt = ENCODING_UTF32LE;
   285                     break;
   286                 }
   287                 p += 4;
   288                 --n;
   289             }
   290             if (n == 0) {
   291                 /* We can't tell, default to host order */
   292                 cd->src_fmt = ENCODING_UTF32NATIVE;
   293             }
   294         }
   295         break;
   296     }
   297 
   298     switch (cd->dst_fmt) {
   299     case ENCODING_UTF16:
   300         /* Default to host order, need to add byte order marker */
   301         if (dstlen < 2) {
   302             return SDL_ICONV_E2BIG;
   303         }
   304         *(Uint16 *) dst = UNICODE_BOM;
   305         dst += 2;
   306         dstlen -= 2;
   307         cd->dst_fmt = ENCODING_UTF16NATIVE;
   308         break;
   309     case ENCODING_UTF32:
   310         /* Default to host order, need to add byte order marker */
   311         if (dstlen < 4) {
   312             return SDL_ICONV_E2BIG;
   313         }
   314         *(Uint32 *) dst = UNICODE_BOM;
   315         dst += 4;
   316         dstlen -= 4;
   317         cd->dst_fmt = ENCODING_UTF32NATIVE;
   318         break;
   319     }
   320 
   321     total = 0;
   322     while (srclen > 0) {
   323         /* Decode a character */
   324         switch (cd->src_fmt) {
   325         case ENCODING_ASCII:
   326             {
   327                 Uint8 *p = (Uint8 *) src;
   328                 ch = (Uint32) (p[0] & 0x7F);
   329                 ++src;
   330                 --srclen;
   331             }
   332             break;
   333         case ENCODING_LATIN1:
   334             {
   335                 Uint8 *p = (Uint8 *) src;
   336                 ch = (Uint32) p[0];
   337                 ++src;
   338                 --srclen;
   339             }
   340             break;
   341         case ENCODING_UTF8:    /* RFC 3629 */
   342             {
   343                 Uint8 *p = (Uint8 *) src;
   344                 size_t left = 0;
   345                 SDL_bool overlong = SDL_FALSE;
   346                 if (p[0] >= 0xFC) {
   347                     if ((p[0] & 0xFE) != 0xFC) {
   348                         /* Skip illegal sequences
   349                            return SDL_ICONV_EILSEQ;
   350                          */
   351                         ch = UNKNOWN_UNICODE;
   352                     } else {
   353                         if (p[0] == 0xFC) {
   354                             overlong = SDL_TRUE;
   355                         }
   356                         ch = (Uint32) (p[0] & 0x01);
   357                         left = 5;
   358                     }
   359                 } else if (p[0] >= 0xF8) {
   360                     if ((p[0] & 0xFC) != 0xF8) {
   361                         /* Skip illegal sequences
   362                            return SDL_ICONV_EILSEQ;
   363                          */
   364                         ch = UNKNOWN_UNICODE;
   365                     } else {
   366                         if (p[0] == 0xF8) {
   367                             overlong = SDL_TRUE;
   368                         }
   369                         ch = (Uint32) (p[0] & 0x03);
   370                         left = 4;
   371                     }
   372                 } else if (p[0] >= 0xF0) {
   373                     if ((p[0] & 0xF8) != 0xF0) {
   374                         /* Skip illegal sequences
   375                            return SDL_ICONV_EILSEQ;
   376                          */
   377                         ch = UNKNOWN_UNICODE;
   378                     } else {
   379                         if (p[0] == 0xF0) {
   380                             overlong = SDL_TRUE;
   381                         }
   382                         ch = (Uint32) (p[0] & 0x07);
   383                         left = 3;
   384                     }
   385                 } else if (p[0] >= 0xE0) {
   386                     if ((p[0] & 0xF0) != 0xE0) {
   387                         /* Skip illegal sequences
   388                            return SDL_ICONV_EILSEQ;
   389                          */
   390                         ch = UNKNOWN_UNICODE;
   391                     } else {
   392                         if (p[0] == 0xE0) {
   393                             overlong = SDL_TRUE;
   394                         }
   395                         ch = (Uint32) (p[0] & 0x0F);
   396                         left = 2;
   397                     }
   398                 } else if (p[0] >= 0xC0) {
   399                     if ((p[0] & 0xE0) != 0xC0) {
   400                         /* Skip illegal sequences
   401                            return SDL_ICONV_EILSEQ;
   402                          */
   403                         ch = UNKNOWN_UNICODE;
   404                     } else {
   405                         if ((p[0] & 0xDE) == 0xC0) {
   406                             overlong = SDL_TRUE;
   407                         }
   408                         ch = (Uint32) (p[0] & 0x1F);
   409                         left = 1;
   410                     }
   411                 } else {
   412                     if ((p[0] & 0x80) != 0x00) {
   413                         /* Skip illegal sequences
   414                            return SDL_ICONV_EILSEQ;
   415                          */
   416                         ch = UNKNOWN_UNICODE;
   417                     } else {
   418                         ch = (Uint32) p[0];
   419                     }
   420                 }
   421                 ++src;
   422                 --srclen;
   423                 if (srclen < left) {
   424                     return SDL_ICONV_EINVAL;
   425                 }
   426                 while (left--) {
   427                     ++p;
   428                     if ((p[0] & 0xC0) != 0x80) {
   429                         /* Skip illegal sequences
   430                            return SDL_ICONV_EILSEQ;
   431                          */
   432                         ch = UNKNOWN_UNICODE;
   433                         break;
   434                     }
   435                     ch <<= 6;
   436                     ch |= (p[0] & 0x3F);
   437                     ++src;
   438                     --srclen;
   439                 }
   440                 if (overlong) {
   441                     /* Potential security risk
   442                        return SDL_ICONV_EILSEQ;
   443                      */
   444                     ch = UNKNOWN_UNICODE;
   445                 }
   446                 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
   447                     (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
   448                     /* Skip illegal sequences
   449                        return SDL_ICONV_EILSEQ;
   450                      */
   451                     ch = UNKNOWN_UNICODE;
   452                 }
   453             }
   454             break;
   455         case ENCODING_UTF16BE: /* RFC 2781 */
   456             {
   457                 Uint8 *p = (Uint8 *) src;
   458                 Uint16 W1, W2;
   459                 if (srclen < 2) {
   460                     return SDL_ICONV_EINVAL;
   461                 }
   462                 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   463                 src += 2;
   464                 srclen -= 2;
   465                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   466                     ch = (Uint32) W1;
   467                     break;
   468                 }
   469                 if (W1 > 0xDBFF) {
   470                     /* Skip illegal sequences
   471                        return SDL_ICONV_EILSEQ;
   472                      */
   473                     ch = UNKNOWN_UNICODE;
   474                     break;
   475                 }
   476                 if (srclen < 2) {
   477                     return SDL_ICONV_EINVAL;
   478                 }
   479                 p = (Uint8 *) src;
   480                 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   481                 src += 2;
   482                 srclen -= 2;
   483                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   484                     /* Skip illegal sequences
   485                        return SDL_ICONV_EILSEQ;
   486                      */
   487                     ch = UNKNOWN_UNICODE;
   488                     break;
   489                 }
   490                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   491                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   492             }
   493             break;
   494         case ENCODING_UTF16LE: /* RFC 2781 */
   495             {
   496                 Uint8 *p = (Uint8 *) src;
   497                 Uint16 W1, W2;
   498                 if (srclen < 2) {
   499                     return SDL_ICONV_EINVAL;
   500                 }
   501                 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   502                 src += 2;
   503                 srclen -= 2;
   504                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   505                     ch = (Uint32) W1;
   506                     break;
   507                 }
   508                 if (W1 > 0xDBFF) {
   509                     /* Skip illegal sequences
   510                        return SDL_ICONV_EILSEQ;
   511                      */
   512                     ch = UNKNOWN_UNICODE;
   513                     break;
   514                 }
   515                 if (srclen < 2) {
   516                     return SDL_ICONV_EINVAL;
   517                 }
   518                 p = (Uint8 *) src;
   519                 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   520                 src += 2;
   521                 srclen -= 2;
   522                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   523                     /* Skip illegal sequences
   524                        return SDL_ICONV_EILSEQ;
   525                      */
   526                     ch = UNKNOWN_UNICODE;
   527                     break;
   528                 }
   529                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   530                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   531             }
   532             break;
   533         case ENCODING_UCS2LE:
   534             {
   535                 Uint8 *p = (Uint8 *) src;
   536                 if (srclen < 2) {
   537                     return SDL_ICONV_EINVAL;
   538                 }
   539                 ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
   540                 src += 2;
   541                 srclen -= 2;
   542             }
   543             break;
   544         case ENCODING_UCS2BE:
   545             {
   546                 Uint8 *p = (Uint8 *) src;
   547                 if (srclen < 2) {
   548                     return SDL_ICONV_EINVAL;
   549                 }
   550                 ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
   551                 src += 2;
   552                 srclen -= 2;
   553             }
   554             break;
   555         case ENCODING_UCS4BE:
   556         case ENCODING_UTF32BE:
   557             {
   558                 Uint8 *p = (Uint8 *) src;
   559                 if (srclen < 4) {
   560                     return SDL_ICONV_EINVAL;
   561                 }
   562                 ch = ((Uint32) p[0] << 24) |
   563                     ((Uint32) p[1] << 16) |
   564                     ((Uint32) p[2] << 8) | (Uint32) p[3];
   565                 src += 4;
   566                 srclen -= 4;
   567             }
   568             break;
   569         case ENCODING_UCS4LE:
   570         case ENCODING_UTF32LE:
   571             {
   572                 Uint8 *p = (Uint8 *) src;
   573                 if (srclen < 4) {
   574                     return SDL_ICONV_EINVAL;
   575                 }
   576                 ch = ((Uint32) p[3] << 24) |
   577                     ((Uint32) p[2] << 16) |
   578                     ((Uint32) p[1] << 8) | (Uint32) p[0];
   579                 src += 4;
   580                 srclen -= 4;
   581             }
   582             break;
   583         }
   584 
   585         /* Encode a character */
   586         switch (cd->dst_fmt) {
   587         case ENCODING_ASCII:
   588             {
   589                 Uint8 *p = (Uint8 *) dst;
   590                 if (dstlen < 1) {
   591                     return SDL_ICONV_E2BIG;
   592                 }
   593                 if (ch > 0x7F) {
   594                     *p = UNKNOWN_ASCII;
   595                 } else {
   596                     *p = (Uint8) ch;
   597                 }
   598                 ++dst;
   599                 --dstlen;
   600             }
   601             break;
   602         case ENCODING_LATIN1:
   603             {
   604                 Uint8 *p = (Uint8 *) dst;
   605                 if (dstlen < 1) {
   606                     return SDL_ICONV_E2BIG;
   607                 }
   608                 if (ch > 0xFF) {
   609                     *p = UNKNOWN_ASCII;
   610                 } else {
   611                     *p = (Uint8) ch;
   612                 }
   613                 ++dst;
   614                 --dstlen;
   615             }
   616             break;
   617         case ENCODING_UTF8:    /* RFC 3629 */
   618             {
   619                 Uint8 *p = (Uint8 *) dst;
   620                 if (ch > 0x10FFFF) {
   621                     ch = UNKNOWN_UNICODE;
   622                 }
   623                 if (ch <= 0x7F) {
   624                     if (dstlen < 1) {
   625                         return SDL_ICONV_E2BIG;
   626                     }
   627                     *p = (Uint8) ch;
   628                     ++dst;
   629                     --dstlen;
   630                 } else if (ch <= 0x7FF) {
   631                     if (dstlen < 2) {
   632                         return SDL_ICONV_E2BIG;
   633                     }
   634                     p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
   635                     p[1] = 0x80 | (Uint8) (ch & 0x3F);
   636                     dst += 2;
   637                     dstlen -= 2;
   638                 } else if (ch <= 0xFFFF) {
   639                     if (dstlen < 3) {
   640                         return SDL_ICONV_E2BIG;
   641                     }
   642                     p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
   643                     p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   644                     p[2] = 0x80 | (Uint8) (ch & 0x3F);
   645                     dst += 3;
   646                     dstlen -= 3;
   647                 } else if (ch <= 0x1FFFFF) {
   648                     if (dstlen < 4) {
   649                         return SDL_ICONV_E2BIG;
   650                     }
   651                     p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
   652                     p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   653                     p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   654                     p[3] = 0x80 | (Uint8) (ch & 0x3F);
   655                     dst += 4;
   656                     dstlen -= 4;
   657                 } else if (ch <= 0x3FFFFFF) {
   658                     if (dstlen < 5) {
   659                         return SDL_ICONV_E2BIG;
   660                     }
   661                     p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
   662                     p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   663                     p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   664                     p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   665                     p[4] = 0x80 | (Uint8) (ch & 0x3F);
   666                     dst += 5;
   667                     dstlen -= 5;
   668                 } else {
   669                     if (dstlen < 6) {
   670                         return SDL_ICONV_E2BIG;
   671                     }
   672                     p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
   673                     p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
   674                     p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   675                     p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   676                     p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   677                     p[5] = 0x80 | (Uint8) (ch & 0x3F);
   678                     dst += 6;
   679                     dstlen -= 6;
   680                 }
   681             }
   682             break;
   683         case ENCODING_UTF16BE: /* RFC 2781 */
   684             {
   685                 Uint8 *p = (Uint8 *) dst;
   686                 if (ch > 0x10FFFF) {
   687                     ch = UNKNOWN_UNICODE;
   688                 }
   689                 if (ch < 0x10000) {
   690                     if (dstlen < 2) {
   691                         return SDL_ICONV_E2BIG;
   692                     }
   693                     p[0] = (Uint8) (ch >> 8);
   694                     p[1] = (Uint8) ch;
   695                     dst += 2;
   696                     dstlen -= 2;
   697                 } else {
   698                     Uint16 W1, W2;
   699                     if (dstlen < 4) {
   700                         return SDL_ICONV_E2BIG;
   701                     }
   702                     ch = ch - 0x10000;
   703                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   704                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   705                     p[0] = (Uint8) (W1 >> 8);
   706                     p[1] = (Uint8) W1;
   707                     p[2] = (Uint8) (W2 >> 8);
   708                     p[3] = (Uint8) W2;
   709                     dst += 4;
   710                     dstlen -= 4;
   711                 }
   712             }
   713             break;
   714         case ENCODING_UTF16LE: /* RFC 2781 */
   715             {
   716                 Uint8 *p = (Uint8 *) dst;
   717                 if (ch > 0x10FFFF) {
   718                     ch = UNKNOWN_UNICODE;
   719                 }
   720                 if (ch < 0x10000) {
   721                     if (dstlen < 2) {
   722                         return SDL_ICONV_E2BIG;
   723                     }
   724                     p[1] = (Uint8) (ch >> 8);
   725                     p[0] = (Uint8) ch;
   726                     dst += 2;
   727                     dstlen -= 2;
   728                 } else {
   729                     Uint16 W1, W2;
   730                     if (dstlen < 4) {
   731                         return SDL_ICONV_E2BIG;
   732                     }
   733                     ch = ch - 0x10000;
   734                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   735                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   736                     p[1] = (Uint8) (W1 >> 8);
   737                     p[0] = (Uint8) W1;
   738                     p[3] = (Uint8) (W2 >> 8);
   739                     p[2] = (Uint8) W2;
   740                     dst += 4;
   741                     dstlen -= 4;
   742                 }
   743             }
   744             break;
   745         case ENCODING_UCS2BE:
   746             {
   747                 Uint8 *p = (Uint8 *) dst;
   748                 if (ch > 0xFFFF) {
   749                     ch = UNKNOWN_UNICODE;
   750                 }
   751                 if (dstlen < 2) {
   752                     return SDL_ICONV_E2BIG;
   753                 }
   754                 p[0] = (Uint8) (ch >> 8);
   755                 p[1] = (Uint8) ch;
   756                 dst += 2;
   757                 dstlen -= 2;
   758             }
   759             break;
   760         case ENCODING_UCS2LE:
   761             {
   762                 Uint8 *p = (Uint8 *) dst;
   763                 if (ch > 0xFFFF) {
   764                     ch = UNKNOWN_UNICODE;
   765                 }
   766                 if (dstlen < 2) {
   767                     return SDL_ICONV_E2BIG;
   768                 }
   769                 p[1] = (Uint8) (ch >> 8);
   770                 p[0] = (Uint8) ch;
   771                 dst += 2;
   772                 dstlen -= 2;
   773             }
   774             break;
   775         case ENCODING_UTF32BE:
   776             if (ch > 0x10FFFF) {
   777                 ch = UNKNOWN_UNICODE;
   778             }
   779         case ENCODING_UCS4BE:
   780             if (ch > 0x7FFFFFFF) {
   781                 ch = UNKNOWN_UNICODE;
   782             }
   783             {
   784                 Uint8 *p = (Uint8 *) dst;
   785                 if (dstlen < 4) {
   786                     return SDL_ICONV_E2BIG;
   787                 }
   788                 p[0] = (Uint8) (ch >> 24);
   789                 p[1] = (Uint8) (ch >> 16);
   790                 p[2] = (Uint8) (ch >> 8);
   791                 p[3] = (Uint8) ch;
   792                 dst += 4;
   793                 dstlen -= 4;
   794             }
   795             break;
   796         case ENCODING_UTF32LE:
   797             if (ch > 0x10FFFF) {
   798                 ch = UNKNOWN_UNICODE;
   799             }
   800         case ENCODING_UCS4LE:
   801             if (ch > 0x7FFFFFFF) {
   802                 ch = UNKNOWN_UNICODE;
   803             }
   804             {
   805                 Uint8 *p = (Uint8 *) dst;
   806                 if (dstlen < 4) {
   807                     return SDL_ICONV_E2BIG;
   808                 }
   809                 p[3] = (Uint8) (ch >> 24);
   810                 p[2] = (Uint8) (ch >> 16);
   811                 p[1] = (Uint8) (ch >> 8);
   812                 p[0] = (Uint8) ch;
   813                 dst += 4;
   814                 dstlen -= 4;
   815             }
   816             break;
   817         }
   818 
   819         /* Update state */
   820         *inbuf = src;
   821         *inbytesleft = srclen;
   822         *outbuf = dst;
   823         *outbytesleft = dstlen;
   824         ++total;
   825     }
   826     return total;
   827 }
   828 
   829 int
   830 SDL_iconv_close(SDL_iconv_t cd)
   831 {
   832     if (cd && cd != (SDL_iconv_t) - 1) {
   833         SDL_free(cd);
   834     }
   835     return 0;
   836 }
   837 
   838 #endif /* !HAVE_ICONV */
   839 
   840 char *
   841 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
   842                  size_t inbytesleft)
   843 {
   844     SDL_iconv_t cd;
   845     char *string;
   846     size_t stringsize;
   847     char *outbuf;
   848     size_t outbytesleft;
   849     size_t retCode = 0;
   850 
   851     cd = SDL_iconv_open(tocode, fromcode);
   852     if (cd == (SDL_iconv_t) - 1) {
   853         /* See if we can recover here (fixes iconv on Solaris 11) */
   854         if (!tocode || !*tocode) {
   855             tocode = "UTF-8";
   856         }
   857         if (!fromcode || !*fromcode) {
   858             fromcode = "UTF-8";
   859         }
   860         cd = SDL_iconv_open(tocode, fromcode);
   861     }
   862     if (cd == (SDL_iconv_t) - 1) {
   863         return NULL;
   864     }
   865 
   866     stringsize = inbytesleft > 4 ? inbytesleft : 4;
   867     string = SDL_malloc(stringsize);
   868     if (!string) {
   869         SDL_iconv_close(cd);
   870         return NULL;
   871     }
   872     outbuf = string;
   873     outbytesleft = stringsize;
   874     SDL_memset(outbuf, 0, 4);
   875 
   876     while (inbytesleft > 0) {
   877         retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   878         switch (retCode) {
   879         case SDL_ICONV_E2BIG:
   880             {
   881                 char *oldstring = string;
   882                 stringsize *= 2;
   883                 string = SDL_realloc(string, stringsize);
   884                 if (!string) {
   885                     SDL_iconv_close(cd);
   886                     return NULL;
   887                 }
   888                 outbuf = string + (outbuf - oldstring);
   889                 outbytesleft = stringsize - (outbuf - string);
   890                 SDL_memset(outbuf, 0, 4);
   891             }
   892             break;
   893         case SDL_ICONV_EILSEQ:
   894             /* Try skipping some input data - not perfect, but... */
   895             ++inbuf;
   896             --inbytesleft;
   897             break;
   898         case SDL_ICONV_EINVAL:
   899         case SDL_ICONV_ERROR:
   900             /* We can't continue... */
   901             inbytesleft = 0;
   902             break;
   903         }
   904     }
   905     SDL_iconv_close(cd);
   906 
   907     return string;
   908 }
   909 
   910 /* vi: set ts=4 sw=4 expandtab: */