src/stdlib/SDL_iconv.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 11 Dec 2009 08:03:43 +0000
changeset 3539 f2846bf19360
parent 3320 b225c8a7896e
child 3697 f7b03b6838cb
permissions -rw-r--r--
Fixed bug #896

John Popplewell 2009-12-08 23:05:50 PST

Originally reported by AKFoerster on the mailing list.

Error decoding UTF8 Russian text to UTF-16LE on Windows, but specifically on
platforms without iconv support (the default on Windows).

Valid UTF8 characters are flagged as being overlong and then substituted by the
UNKNOWN_UNICODE character.

After studying the testiconv.c example program, reading the RFCs and putting
some printf statements in SDL_iconv.c the problem is in a test for 'Maximum
overlong sequences', specifically 4.2.1, which is carried out by the following
code:

} else if ( p[0] >= 0xC0 ) {
if ( (p[0] & 0xE0) != 0xC0 ) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
} else {
if ( (p[0] & 0xCE) == 0xC0 ) { <<<<<<<< here
overlong = SDL_TRUE;
}
ch = (Uint32)(p[0] & 0x1F);
left = 1;
}
} else {

Here is the 2-byte encoding of a character in range 00000080 - 000007FF
110xxxxx 10xxxxxx

The line in question is supposed to be checking for an overlong sequence which
would be less than
11000001 10111111

which should be represented as a single byte.

BUT, the mask value (0xCE) is wrong, it isn't checking the top-most bit:
11000001 value
11001110 mask (incorrect)
^
and should be (0xDE):
11000001 value
11011110 mask (correct)

making the above code:

} else if ( p[0] >= 0xC0 ) {
if ( (p[0] & 0xE0) != 0xC0 ) {
/* Skip illegal sequences
return SDL_ICONV_EILSEQ;
*/
ch = UNKNOWN_UNICODE;
} else {
if ( (p[0] & 0xDE) == 0xC0 ) { <<<<<<<< here
overlong = SDL_TRUE;
}
ch = (Uint32)(p[0] & 0x1F);
left = 1;
}
} else {

I can supply a test program and/or a patch if required,

best regards,
John Popplewell
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2009 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This file contains portable iconv functions for SDL */
    25 
    26 #include "SDL_stdinc.h"
    27 #include "SDL_endian.h"
    28 
    29 #ifdef HAVE_ICONV
    30 
    31 /* Depending on which standard the iconv() was implemented with,
    32    iconv() may or may not use const char ** for the inbuf param.
    33    If we get this wrong, it's just a warning, so no big deal.
    34 */
    35 #if defined(_XGP6) || \
    36     defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
    37 #define ICONV_INBUF_NONCONST
    38 #endif
    39 
    40 #include <errno.h>
    41 
    42 size_t
    43 SDL_iconv(SDL_iconv_t cd,
    44           const char **inbuf, size_t * inbytesleft,
    45           char **outbuf, size_t * outbytesleft)
    46 {
    47     size_t retCode;
    48 #ifdef ICONV_INBUF_NONCONST
    49     retCode = iconv(cd, (char **) inbuf, inbytesleft, outbuf, outbytesleft);
    50 #else
    51     retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
    52 #endif
    53     if (retCode == (size_t) - 1) {
    54         switch (errno) {
    55         case E2BIG:
    56             return SDL_ICONV_E2BIG;
    57         case EILSEQ:
    58             return SDL_ICONV_EILSEQ;
    59         case EINVAL:
    60             return SDL_ICONV_EINVAL;
    61         default:
    62             return SDL_ICONV_ERROR;
    63         }
    64     }
    65     return retCode;
    66 }
    67 
    68 #else
    69 
    70 /* Lots of useful information on Unicode at:
    71 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    72 */
    73 
    74 #define UNICODE_BOM	0xFEFF
    75 
    76 #define UNKNOWN_ASCII	'?'
    77 #define UNKNOWN_UNICODE	0xFFFD
    78 
    79 enum
    80 {
    81     ENCODING_UNKNOWN,
    82     ENCODING_ASCII,
    83     ENCODING_LATIN1,
    84     ENCODING_UTF8,
    85     ENCODING_UTF16,             /* Needs byte order marker */
    86     ENCODING_UTF16BE,
    87     ENCODING_UTF16LE,
    88     ENCODING_UTF32,             /* Needs byte order marker */
    89     ENCODING_UTF32BE,
    90     ENCODING_UTF32LE,
    91     ENCODING_UCS2,              /* Native byte order assumed */
    92     ENCODING_UCS4,              /* Native byte order assumed */
    93 };
    94 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    95 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    96 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    97 #else
    98 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
    99 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
   100 #endif
   101 
   102 struct _SDL_iconv_t
   103 {
   104     int src_fmt;
   105     int dst_fmt;
   106 };
   107 
   108 static struct
   109 {
   110     const char *name;
   111     int format;
   112 } encodings[] = {
   113 /* *INDENT-OFF* */
   114     { "ASCII", ENCODING_ASCII },
   115     { "US-ASCII", ENCODING_ASCII },
   116     { "8859-1", ENCODING_LATIN1 },
   117     { "ISO-8859-1", ENCODING_LATIN1 },
   118     { "UTF8", ENCODING_UTF8 },
   119     { "UTF-8", ENCODING_UTF8 },
   120     { "UTF16", ENCODING_UTF16 },
   121     { "UTF-16", ENCODING_UTF16 },
   122     { "UTF16BE", ENCODING_UTF16BE },
   123     { "UTF-16BE", ENCODING_UTF16BE },
   124     { "UTF16LE", ENCODING_UTF16LE },
   125     { "UTF-16LE", ENCODING_UTF16LE },
   126     { "UTF32", ENCODING_UTF32 },
   127     { "UTF-32", ENCODING_UTF32 },
   128     { "UTF32BE", ENCODING_UTF32BE },
   129     { "UTF-32BE", ENCODING_UTF32BE },
   130     { "UTF32LE", ENCODING_UTF32LE },
   131     { "UTF-32LE", ENCODING_UTF32LE },
   132     { "UCS2", ENCODING_UCS2 },
   133     { "UCS-2", ENCODING_UCS2 },
   134     { "UCS4", ENCODING_UCS4 },
   135     { "UCS-4", ENCODING_UCS4 },
   136 /* *INDENT-ON* */
   137 };
   138 
   139 static const char *
   140 getlocale(char *buffer, size_t bufsize)
   141 {
   142     const char *lang;
   143     char *ptr;
   144 
   145     lang = SDL_getenv("LC_ALL");
   146     if (!lang) {
   147         lang = SDL_getenv("LC_CTYPE");
   148     }
   149     if (!lang) {
   150         lang = SDL_getenv("LC_MESSAGES");
   151     }
   152     if (!lang) {
   153         lang = SDL_getenv("LANG");
   154     }
   155     if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
   156         lang = "ASCII";
   157     }
   158 
   159     /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
   160     ptr = SDL_strchr(lang, '.');
   161     if (ptr != NULL) {
   162         lang = ptr + 1;
   163     }
   164 
   165     SDL_strlcpy(buffer, lang, bufsize);
   166     ptr = SDL_strchr(buffer, '@');
   167     if (ptr != NULL) {
   168         *ptr = '\0';            /* chop end of string. */
   169     }
   170 
   171     return buffer;
   172 }
   173 
   174 SDL_iconv_t
   175 SDL_iconv_open(const char *tocode, const char *fromcode)
   176 {
   177     int src_fmt = ENCODING_UNKNOWN;
   178     int dst_fmt = ENCODING_UNKNOWN;
   179     int i;
   180     char fromcode_buffer[64];
   181     char tocode_buffer[64];
   182 
   183     if (!fromcode || !*fromcode) {
   184         fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
   185     }
   186     if (!tocode || !*tocode) {
   187         tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
   188     }
   189     for (i = 0; i < SDL_arraysize(encodings); ++i) {
   190         if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
   191             src_fmt = encodings[i].format;
   192             if (dst_fmt != ENCODING_UNKNOWN) {
   193                 break;
   194             }
   195         }
   196         if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
   197             dst_fmt = encodings[i].format;
   198             if (src_fmt != ENCODING_UNKNOWN) {
   199                 break;
   200             }
   201         }
   202     }
   203     if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
   204         SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
   205         if (cd) {
   206             cd->src_fmt = src_fmt;
   207             cd->dst_fmt = dst_fmt;
   208             return cd;
   209         }
   210     }
   211     return (SDL_iconv_t) - 1;
   212 }
   213 
   214 size_t
   215 SDL_iconv(SDL_iconv_t cd,
   216           const char **inbuf, size_t * inbytesleft,
   217           char **outbuf, size_t * outbytesleft)
   218 {
   219     /* For simplicity, we'll convert everything to and from UCS-4 */
   220     const char *src;
   221     char *dst;
   222     size_t srclen, dstlen;
   223     Uint32 ch = 0;
   224     size_t total;
   225 
   226     if (!inbuf || !*inbuf) {
   227         /* Reset the context */
   228         return 0;
   229     }
   230     if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
   231         return SDL_ICONV_E2BIG;
   232     }
   233     src = *inbuf;
   234     srclen = (inbytesleft ? *inbytesleft : 0);
   235     dst = *outbuf;
   236     dstlen = *outbytesleft;
   237 
   238     switch (cd->src_fmt) {
   239     case ENCODING_UTF16:
   240         /* Scan for a byte order marker */
   241         {
   242             Uint8 *p = (Uint8 *) src;
   243             size_t n = srclen / 2;
   244             while (n) {
   245                 if (p[0] == 0xFF && p[1] == 0xFE) {
   246                     cd->src_fmt = ENCODING_UTF16BE;
   247                     break;
   248                 } else if (p[0] == 0xFE && p[1] == 0xFF) {
   249                     cd->src_fmt = ENCODING_UTF16LE;
   250                     break;
   251                 }
   252                 p += 2;
   253                 --n;
   254             }
   255             if (n == 0) {
   256                 /* We can't tell, default to host order */
   257                 cd->src_fmt = ENCODING_UTF16NATIVE;
   258             }
   259         }
   260         break;
   261     case ENCODING_UTF32:
   262         /* Scan for a byte order marker */
   263         {
   264             Uint8 *p = (Uint8 *) src;
   265             size_t n = srclen / 4;
   266             while (n) {
   267                 if (p[0] == 0xFF && p[1] == 0xFE &&
   268                     p[2] == 0x00 && p[3] == 0x00) {
   269                     cd->src_fmt = ENCODING_UTF32BE;
   270                     break;
   271                 } else if (p[0] == 0x00 && p[1] == 0x00 &&
   272                            p[2] == 0xFE && p[3] == 0xFF) {
   273                     cd->src_fmt = ENCODING_UTF32LE;
   274                     break;
   275                 }
   276                 p += 4;
   277                 --n;
   278             }
   279             if (n == 0) {
   280                 /* We can't tell, default to host order */
   281                 cd->src_fmt = ENCODING_UTF32NATIVE;
   282             }
   283         }
   284         break;
   285     }
   286 
   287     switch (cd->dst_fmt) {
   288     case ENCODING_UTF16:
   289         /* Default to host order, need to add byte order marker */
   290         if (dstlen < 2) {
   291             return SDL_ICONV_E2BIG;
   292         }
   293         *(Uint16 *) dst = UNICODE_BOM;
   294         dst += 2;
   295         dstlen -= 2;
   296         cd->dst_fmt = ENCODING_UTF16NATIVE;
   297         break;
   298     case ENCODING_UTF32:
   299         /* Default to host order, need to add byte order marker */
   300         if (dstlen < 4) {
   301             return SDL_ICONV_E2BIG;
   302         }
   303         *(Uint32 *) dst = UNICODE_BOM;
   304         dst += 4;
   305         dstlen -= 4;
   306         cd->dst_fmt = ENCODING_UTF32NATIVE;
   307         break;
   308     }
   309 
   310     total = 0;
   311     while (srclen > 0) {
   312         /* Decode a character */
   313         switch (cd->src_fmt) {
   314         case ENCODING_ASCII:
   315             {
   316                 Uint8 *p = (Uint8 *) src;
   317                 ch = (Uint32) (p[0] & 0x7F);
   318                 ++src;
   319                 --srclen;
   320             }
   321             break;
   322         case ENCODING_LATIN1:
   323             {
   324                 Uint8 *p = (Uint8 *) src;
   325                 ch = (Uint32) p[0];
   326                 ++src;
   327                 --srclen;
   328             }
   329             break;
   330         case ENCODING_UTF8:    /* RFC 3629 */
   331             {
   332                 Uint8 *p = (Uint8 *) src;
   333                 size_t left = 0;
   334                 SDL_bool overlong = SDL_FALSE;
   335                 if (p[0] >= 0xFC) {
   336                     if ((p[0] & 0xFE) != 0xFC) {
   337                         /* Skip illegal sequences
   338                            return SDL_ICONV_EILSEQ;
   339                          */
   340                         ch = UNKNOWN_UNICODE;
   341                     } else {
   342                         if (p[0] == 0xFC) {
   343                             overlong = SDL_TRUE;
   344                         }
   345                         ch = (Uint32) (p[0] & 0x01);
   346                         left = 5;
   347                     }
   348                 } else if (p[0] >= 0xF8) {
   349                     if ((p[0] & 0xFC) != 0xF8) {
   350                         /* Skip illegal sequences
   351                            return SDL_ICONV_EILSEQ;
   352                          */
   353                         ch = UNKNOWN_UNICODE;
   354                     } else {
   355                         if (p[0] == 0xF8) {
   356                             overlong = SDL_TRUE;
   357                         }
   358                         ch = (Uint32) (p[0] & 0x03);
   359                         left = 4;
   360                     }
   361                 } else if (p[0] >= 0xF0) {
   362                     if ((p[0] & 0xF8) != 0xF0) {
   363                         /* Skip illegal sequences
   364                            return SDL_ICONV_EILSEQ;
   365                          */
   366                         ch = UNKNOWN_UNICODE;
   367                     } else {
   368                         if (p[0] == 0xF0) {
   369                             overlong = SDL_TRUE;
   370                         }
   371                         ch = (Uint32) (p[0] & 0x07);
   372                         left = 3;
   373                     }
   374                 } else if (p[0] >= 0xE0) {
   375                     if ((p[0] & 0xF0) != 0xE0) {
   376                         /* Skip illegal sequences
   377                            return SDL_ICONV_EILSEQ;
   378                          */
   379                         ch = UNKNOWN_UNICODE;
   380                     } else {
   381                         if (p[0] == 0xE0) {
   382                             overlong = SDL_TRUE;
   383                         }
   384                         ch = (Uint32) (p[0] & 0x0F);
   385                         left = 2;
   386                     }
   387                 } else if (p[0] >= 0xC0) {
   388                     if ((p[0] & 0xE0) != 0xC0) {
   389                         /* Skip illegal sequences
   390                            return SDL_ICONV_EILSEQ;
   391                          */
   392                         ch = UNKNOWN_UNICODE;
   393                     } else {
   394                         if ((p[0] & 0xDE) == 0xC0) {
   395                             overlong = SDL_TRUE;
   396                         }
   397                         ch = (Uint32) (p[0] & 0x1F);
   398                         left = 1;
   399                     }
   400                 } else {
   401                     if ((p[0] & 0x80) != 0x00) {
   402                         /* Skip illegal sequences
   403                            return SDL_ICONV_EILSEQ;
   404                          */
   405                         ch = UNKNOWN_UNICODE;
   406                     } else {
   407                         ch = (Uint32) p[0];
   408                     }
   409                 }
   410                 ++src;
   411                 --srclen;
   412                 if (srclen < left) {
   413                     return SDL_ICONV_EINVAL;
   414                 }
   415                 while (left--) {
   416                     ++p;
   417                     if ((p[0] & 0xC0) != 0x80) {
   418                         /* Skip illegal sequences
   419                            return SDL_ICONV_EILSEQ;
   420                          */
   421                         ch = UNKNOWN_UNICODE;
   422                         break;
   423                     }
   424                     ch <<= 6;
   425                     ch |= (p[0] & 0x3F);
   426                     ++src;
   427                     --srclen;
   428                 }
   429                 if (overlong) {
   430                     /* Potential security risk
   431                        return SDL_ICONV_EILSEQ;
   432                      */
   433                     ch = UNKNOWN_UNICODE;
   434                 }
   435                 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
   436                     (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
   437                     /* Skip illegal sequences
   438                        return SDL_ICONV_EILSEQ;
   439                      */
   440                     ch = UNKNOWN_UNICODE;
   441                 }
   442             }
   443             break;
   444         case ENCODING_UTF16BE: /* RFC 2781 */
   445             {
   446                 Uint8 *p = (Uint8 *) src;
   447                 Uint16 W1, W2;
   448                 if (srclen < 2) {
   449                     return SDL_ICONV_EINVAL;
   450                 }
   451                 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   452                 src += 2;
   453                 srclen -= 2;
   454                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   455                     ch = (Uint32) W1;
   456                     break;
   457                 }
   458                 if (W1 > 0xDBFF) {
   459                     /* Skip illegal sequences
   460                        return SDL_ICONV_EILSEQ;
   461                      */
   462                     ch = UNKNOWN_UNICODE;
   463                     break;
   464                 }
   465                 if (srclen < 2) {
   466                     return SDL_ICONV_EINVAL;
   467                 }
   468                 p = (Uint8 *) src;
   469                 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   470                 src += 2;
   471                 srclen -= 2;
   472                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   473                     /* Skip illegal sequences
   474                        return SDL_ICONV_EILSEQ;
   475                      */
   476                     ch = UNKNOWN_UNICODE;
   477                     break;
   478                 }
   479                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   480                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   481             }
   482             break;
   483         case ENCODING_UTF16LE: /* RFC 2781 */
   484             {
   485                 Uint8 *p = (Uint8 *) src;
   486                 Uint16 W1, W2;
   487                 if (srclen < 2) {
   488                     return SDL_ICONV_EINVAL;
   489                 }
   490                 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   491                 src += 2;
   492                 srclen -= 2;
   493                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   494                     ch = (Uint32) W1;
   495                     break;
   496                 }
   497                 if (W1 > 0xDBFF) {
   498                     /* Skip illegal sequences
   499                        return SDL_ICONV_EILSEQ;
   500                      */
   501                     ch = UNKNOWN_UNICODE;
   502                     break;
   503                 }
   504                 if (srclen < 2) {
   505                     return SDL_ICONV_EINVAL;
   506                 }
   507                 p = (Uint8 *) src;
   508                 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   509                 src += 2;
   510                 srclen -= 2;
   511                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   512                     /* Skip illegal sequences
   513                        return SDL_ICONV_EILSEQ;
   514                      */
   515                     ch = UNKNOWN_UNICODE;
   516                     break;
   517                 }
   518                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   519                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   520             }
   521             break;
   522         case ENCODING_UTF32BE:
   523             {
   524                 Uint8 *p = (Uint8 *) src;
   525                 if (srclen < 4) {
   526                     return SDL_ICONV_EINVAL;
   527                 }
   528                 ch = ((Uint32) p[0] << 24) |
   529                     ((Uint32) p[1] << 16) |
   530                     ((Uint32) p[2] << 8) | (Uint32) p[3];
   531                 src += 4;
   532                 srclen -= 4;
   533             }
   534             break;
   535         case ENCODING_UTF32LE:
   536             {
   537                 Uint8 *p = (Uint8 *) src;
   538                 if (srclen < 4) {
   539                     return SDL_ICONV_EINVAL;
   540                 }
   541                 ch = ((Uint32) p[3] << 24) |
   542                     ((Uint32) p[2] << 16) |
   543                     ((Uint32) p[1] << 8) | (Uint32) p[0];
   544                 src += 4;
   545                 srclen -= 4;
   546             }
   547             break;
   548         case ENCODING_UCS2:
   549             {
   550                 Uint16 *p = (Uint16 *) src;
   551                 if (srclen < 2) {
   552                     return SDL_ICONV_EINVAL;
   553                 }
   554                 ch = *p;
   555                 src += 2;
   556                 srclen -= 2;
   557             }
   558             break;
   559         case ENCODING_UCS4:
   560             {
   561                 Uint32 *p = (Uint32 *) src;
   562                 if (srclen < 4) {
   563                     return SDL_ICONV_EINVAL;
   564                 }
   565                 ch = *p;
   566                 src += 4;
   567                 srclen -= 4;
   568             }
   569             break;
   570         }
   571 
   572         /* Encode a character */
   573         switch (cd->dst_fmt) {
   574         case ENCODING_ASCII:
   575             {
   576                 Uint8 *p = (Uint8 *) dst;
   577                 if (dstlen < 1) {
   578                     return SDL_ICONV_E2BIG;
   579                 }
   580                 if (ch > 0x7F) {
   581                     *p = UNKNOWN_ASCII;
   582                 } else {
   583                     *p = (Uint8) ch;
   584                 }
   585                 ++dst;
   586                 --dstlen;
   587             }
   588             break;
   589         case ENCODING_LATIN1:
   590             {
   591                 Uint8 *p = (Uint8 *) dst;
   592                 if (dstlen < 1) {
   593                     return SDL_ICONV_E2BIG;
   594                 }
   595                 if (ch > 0xFF) {
   596                     *p = UNKNOWN_ASCII;
   597                 } else {
   598                     *p = (Uint8) ch;
   599                 }
   600                 ++dst;
   601                 --dstlen;
   602             }
   603             break;
   604         case ENCODING_UTF8:    /* RFC 3629 */
   605             {
   606                 Uint8 *p = (Uint8 *) dst;
   607                 if (ch > 0x10FFFF) {
   608                     ch = UNKNOWN_UNICODE;
   609                 }
   610                 if (ch <= 0x7F) {
   611                     if (dstlen < 1) {
   612                         return SDL_ICONV_E2BIG;
   613                     }
   614                     *p = (Uint8) ch;
   615                     ++dst;
   616                     --dstlen;
   617                 } else if (ch <= 0x7FF) {
   618                     if (dstlen < 2) {
   619                         return SDL_ICONV_E2BIG;
   620                     }
   621                     p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
   622                     p[1] = 0x80 | (Uint8) (ch & 0x3F);
   623                     dst += 2;
   624                     dstlen -= 2;
   625                 } else if (ch <= 0xFFFF) {
   626                     if (dstlen < 3) {
   627                         return SDL_ICONV_E2BIG;
   628                     }
   629                     p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
   630                     p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   631                     p[2] = 0x80 | (Uint8) (ch & 0x3F);
   632                     dst += 3;
   633                     dstlen -= 3;
   634                 } else if (ch <= 0x1FFFFF) {
   635                     if (dstlen < 4) {
   636                         return SDL_ICONV_E2BIG;
   637                     }
   638                     p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
   639                     p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   640                     p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   641                     p[3] = 0x80 | (Uint8) (ch & 0x3F);
   642                     dst += 4;
   643                     dstlen -= 4;
   644                 } else if (ch <= 0x3FFFFFF) {
   645                     if (dstlen < 5) {
   646                         return SDL_ICONV_E2BIG;
   647                     }
   648                     p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
   649                     p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   650                     p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   651                     p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   652                     p[4] = 0x80 | (Uint8) (ch & 0x3F);
   653                     dst += 5;
   654                     dstlen -= 5;
   655                 } else {
   656                     if (dstlen < 6) {
   657                         return SDL_ICONV_E2BIG;
   658                     }
   659                     p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
   660                     p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
   661                     p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   662                     p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   663                     p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   664                     p[5] = 0x80 | (Uint8) (ch & 0x3F);
   665                     dst += 6;
   666                     dstlen -= 6;
   667                 }
   668             }
   669             break;
   670         case ENCODING_UTF16BE: /* RFC 2781 */
   671             {
   672                 Uint8 *p = (Uint8 *) dst;
   673                 if (ch > 0x10FFFF) {
   674                     ch = UNKNOWN_UNICODE;
   675                 }
   676                 if (ch < 0x10000) {
   677                     if (dstlen < 2) {
   678                         return SDL_ICONV_E2BIG;
   679                     }
   680                     p[0] = (Uint8) (ch >> 8);
   681                     p[1] = (Uint8) ch;
   682                     dst += 2;
   683                     dstlen -= 2;
   684                 } else {
   685                     Uint16 W1, W2;
   686                     if (dstlen < 4) {
   687                         return SDL_ICONV_E2BIG;
   688                     }
   689                     ch = ch - 0x10000;
   690                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   691                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   692                     p[0] = (Uint8) (W1 >> 8);
   693                     p[1] = (Uint8) W1;
   694                     p[2] = (Uint8) (W2 >> 8);
   695                     p[3] = (Uint8) W2;
   696                     dst += 4;
   697                     dstlen -= 4;
   698                 }
   699             }
   700             break;
   701         case ENCODING_UTF16LE: /* RFC 2781 */
   702             {
   703                 Uint8 *p = (Uint8 *) dst;
   704                 if (ch > 0x10FFFF) {
   705                     ch = UNKNOWN_UNICODE;
   706                 }
   707                 if (ch < 0x10000) {
   708                     if (dstlen < 2) {
   709                         return SDL_ICONV_E2BIG;
   710                     }
   711                     p[1] = (Uint8) (ch >> 8);
   712                     p[0] = (Uint8) ch;
   713                     dst += 2;
   714                     dstlen -= 2;
   715                 } else {
   716                     Uint16 W1, W2;
   717                     if (dstlen < 4) {
   718                         return SDL_ICONV_E2BIG;
   719                     }
   720                     ch = ch - 0x10000;
   721                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   722                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   723                     p[1] = (Uint8) (W1 >> 8);
   724                     p[0] = (Uint8) W1;
   725                     p[3] = (Uint8) (W2 >> 8);
   726                     p[2] = (Uint8) W2;
   727                     dst += 4;
   728                     dstlen -= 4;
   729                 }
   730             }
   731             break;
   732         case ENCODING_UTF32BE:
   733             {
   734                 Uint8 *p = (Uint8 *) dst;
   735                 if (ch > 0x10FFFF) {
   736                     ch = UNKNOWN_UNICODE;
   737                 }
   738                 if (dstlen < 4) {
   739                     return SDL_ICONV_E2BIG;
   740                 }
   741                 p[0] = (Uint8) (ch >> 24);
   742                 p[1] = (Uint8) (ch >> 16);
   743                 p[2] = (Uint8) (ch >> 8);
   744                 p[3] = (Uint8) ch;
   745                 dst += 4;
   746                 dstlen -= 4;
   747             }
   748             break;
   749         case ENCODING_UTF32LE:
   750             {
   751                 Uint8 *p = (Uint8 *) dst;
   752                 if (ch > 0x10FFFF) {
   753                     ch = UNKNOWN_UNICODE;
   754                 }
   755                 if (dstlen < 4) {
   756                     return SDL_ICONV_E2BIG;
   757                 }
   758                 p[3] = (Uint8) (ch >> 24);
   759                 p[2] = (Uint8) (ch >> 16);
   760                 p[1] = (Uint8) (ch >> 8);
   761                 p[0] = (Uint8) ch;
   762                 dst += 4;
   763                 dstlen -= 4;
   764             }
   765             break;
   766         case ENCODING_UCS2:
   767             {
   768                 Uint16 *p = (Uint16 *) dst;
   769                 if (ch > 0xFFFF) {
   770                     ch = UNKNOWN_UNICODE;
   771                 }
   772                 if (dstlen < 2) {
   773                     return SDL_ICONV_E2BIG;
   774                 }
   775                 *p = (Uint16) ch;
   776                 dst += 2;
   777                 dstlen -= 2;
   778             }
   779             break;
   780         case ENCODING_UCS4:
   781             {
   782                 Uint32 *p = (Uint32 *) dst;
   783                 if (ch > 0x7FFFFFFF) {
   784                     ch = UNKNOWN_UNICODE;
   785                 }
   786                 if (dstlen < 4) {
   787                     return SDL_ICONV_E2BIG;
   788                 }
   789                 *p = ch;
   790                 dst += 4;
   791                 dstlen -= 4;
   792             }
   793             break;
   794         }
   795 
   796         /* Update state */
   797         *inbuf = src;
   798         *inbytesleft = srclen;
   799         *outbuf = dst;
   800         *outbytesleft = dstlen;
   801         ++total;
   802     }
   803     return total;
   804 }
   805 
   806 int
   807 SDL_iconv_close(SDL_iconv_t cd)
   808 {
   809     if (cd && cd != (SDL_iconv_t) - 1) {
   810         SDL_free(cd);
   811     }
   812     return 0;
   813 }
   814 
   815 #endif /* !HAVE_ICONV */
   816 
   817 char *
   818 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
   819                  size_t inbytesleft)
   820 {
   821     SDL_iconv_t cd;
   822     char *string;
   823     size_t stringsize;
   824     char *outbuf;
   825     size_t outbytesleft;
   826     size_t retCode = 0;
   827 
   828     cd = SDL_iconv_open(tocode, fromcode);
   829     if (cd == (SDL_iconv_t) - 1) {
   830         /* See if we can recover here (fixes iconv on Solaris 11) */
   831         if (!tocode || !*tocode) {
   832             tocode = "UTF-8";
   833         }
   834         if (!fromcode || !*fromcode) {
   835             fromcode = "UTF-8";
   836         }
   837         cd = SDL_iconv_open(tocode, fromcode);
   838     }
   839     if (cd == (SDL_iconv_t) - 1) {
   840         return NULL;
   841     }
   842 
   843     stringsize = inbytesleft > 4 ? inbytesleft : 4;
   844     string = SDL_malloc(stringsize);
   845     if (!string) {
   846         SDL_iconv_close(cd);
   847         return NULL;
   848     }
   849     outbuf = string;
   850     outbytesleft = stringsize;
   851     SDL_memset(outbuf, 0, 4);
   852 
   853     while (inbytesleft > 0) {
   854         retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   855         switch (retCode) {
   856         case SDL_ICONV_E2BIG:
   857             {
   858                 char *oldstring = string;
   859                 stringsize *= 2;
   860                 string = SDL_realloc(string, stringsize);
   861                 if (!string) {
   862                     SDL_iconv_close(cd);
   863                     return NULL;
   864                 }
   865                 outbuf = string + (outbuf - oldstring);
   866                 outbytesleft = stringsize - (outbuf - string);
   867                 SDL_memset(outbuf, 0, 4);
   868             }
   869             break;
   870         case SDL_ICONV_EILSEQ:
   871             /* Try skipping some input data - not perfect, but... */
   872             ++inbuf;
   873             --inbytesleft;
   874             break;
   875         case SDL_ICONV_EINVAL:
   876         case SDL_ICONV_ERROR:
   877             /* We can't continue... */
   878             inbytesleft = 0;
   879             break;
   880         }
   881     }
   882     SDL_iconv_close(cd);
   883 
   884     return string;
   885 }
   886 
   887 /* vi: set ts=4 sw=4 expandtab: */