src/stdlib/SDL_iconv.c
author Sam Lantinga <slouken@libsdl.org>
Thu, 12 Jul 2007 07:52:50 +0000
changeset 2182 cc2597da0840
parent 2143 e906da4414a3
child 2183 9f31740cad2e
permissions -rw-r--r--
Fixed bug #455

If passed "" for the character set, let iconv_open() interpret it as
locale.

This was merged from revision 3234,3235 from SDL 1.2
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This file contains portable iconv functions for SDL */
    25 
    26 #include "SDL_stdinc.h"
    27 #include "SDL_endian.h"
    28 
    29 #ifdef HAVE_ICONV
    30 
    31 /* Depending on which standard the iconv() was implemented with,
    32    iconv() may or may not use const char ** for the inbuf param.
    33    If we get this wrong, it's just a warning, so no big deal.
    34 */
    35 #if defined(_XGP6) || \
    36     defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
    37 #define ICONV_INBUF_NONCONST
    38 #endif
    39 
    40 #include <errno.h>
    41 
    42 size_t
    43 SDL_iconv(SDL_iconv_t cd,
    44           const char **inbuf, size_t * inbytesleft,
    45           char **outbuf, size_t * outbytesleft)
    46 {
    47     size_t retCode;
    48 #ifdef ICONV_INBUF_NONCONST
    49     retCode = iconv(cd, (char **) inbuf, inbytesleft, outbuf, outbytesleft);
    50 #else
    51     retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
    52 #endif
    53     if (retCode == (size_t) - 1) {
    54         switch (errno) {
    55         case E2BIG:
    56             return SDL_ICONV_E2BIG;
    57         case EILSEQ:
    58             return SDL_ICONV_EILSEQ;
    59         case EINVAL:
    60             return SDL_ICONV_EINVAL;
    61         default:
    62             return SDL_ICONV_ERROR;
    63         }
    64     }
    65     return retCode;
    66 }
    67 
    68 #else
    69 
    70 /* Lots of useful information on Unicode at:
    71 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    72 */
    73 
    74 #define UNICODE_BOM	0xFEFF
    75 
    76 #define UNKNOWN_ASCII	'?'
    77 #define UNKNOWN_UNICODE	0xFFFD
    78 
    79 enum
    80 {
    81     ENCODING_UNKNOWN,
    82     ENCODING_ASCII,
    83     ENCODING_LATIN1,
    84     ENCODING_UTF8,
    85     ENCODING_UTF16,             /* Needs byte order marker */
    86     ENCODING_UTF16BE,
    87     ENCODING_UTF16LE,
    88     ENCODING_UTF32,             /* Needs byte order marker */
    89     ENCODING_UTF32BE,
    90     ENCODING_UTF32LE,
    91     ENCODING_UCS2,              /* Native byte order assumed */
    92     ENCODING_UCS4,              /* Native byte order assumed */
    93 };
    94 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    95 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    96 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    97 #else
    98 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
    99 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
   100 #endif
   101 
   102 struct _SDL_iconv_t
   103 {
   104     int src_fmt;
   105     int dst_fmt;
   106 };
   107 
   108 static struct
   109 {
   110     const char *name;
   111     int format;
   112 } encodings[] = {
   113 /* *INDENT-OFF* */
   114     { "ASCII", ENCODING_ASCII },
   115     { "US-ASCII", ENCODING_ASCII },
   116     { "8859-1", ENCODING_LATIN1 },
   117     { "ISO-8859-1", ENCODING_LATIN1 },
   118     { "UTF8", ENCODING_UTF8 },
   119     { "UTF-8", ENCODING_UTF8 },
   120     { "UTF16", ENCODING_UTF16 },
   121     { "UTF-16", ENCODING_UTF16 },
   122     { "UTF16BE", ENCODING_UTF16BE },
   123     { "UTF-16BE", ENCODING_UTF16BE },
   124     { "UTF16LE", ENCODING_UTF16LE },
   125     { "UTF-16LE", ENCODING_UTF16LE },
   126     { "UTF32", ENCODING_UTF32 },
   127     { "UTF-32", ENCODING_UTF32 },
   128     { "UTF32BE", ENCODING_UTF32BE },
   129     { "UTF-32BE", ENCODING_UTF32BE },
   130     { "UTF32LE", ENCODING_UTF32LE },
   131     { "UTF-32LE", ENCODING_UTF32LE },
   132     { "UCS2", ENCODING_UCS2 },
   133     { "UCS-2", ENCODING_UCS2 },
   134     { "UCS4", ENCODING_UCS4 },
   135     { "UCS-4", ENCODING_UCS4 },
   136 /* *INDENT-ON* */
   137 };
   138 
   139 static const char *
   140 getlocale()
   141 {
   142     const char *lang;
   143 
   144     lang = SDL_getenv("LC_ALL");
   145     if (!lang) {
   146         lang = SDL_getenv("LC_CTYPE");
   147     }
   148     if (!lang) {
   149         lang = SDL_getenv("LC_MESSAGES");
   150     }
   151     if (!lang) {
   152         lang = SDL_getenv("LANG");
   153     }
   154     if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
   155         lang = "ASCII";
   156     }
   157     return lang;
   158 }
   159 
   160 SDL_iconv_t
   161 SDL_iconv_open(const char *tocode, const char *fromcode)
   162 {
   163     int src_fmt = ENCODING_UNKNOWN;
   164     int dst_fmt = ENCODING_UNKNOWN;
   165     int i;
   166 
   167     if (!fromcode || !*fromcode) {
   168         fromcode = getlocale();
   169     }
   170     if (!tocode || !*tocode) {
   171         fromcode = getlocale();
   172     }
   173     for (i = 0; i < SDL_arraysize(encodings); ++i) {
   174         if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
   175             src_fmt = encodings[i].format;
   176             if (dst_fmt != ENCODING_UNKNOWN) {
   177                 break;
   178             }
   179         }
   180         if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
   181             dst_fmt = encodings[i].format;
   182             if (src_fmt != ENCODING_UNKNOWN) {
   183                 break;
   184             }
   185         }
   186     }
   187     if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
   188         SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
   189         if (cd) {
   190             cd->src_fmt = src_fmt;
   191             cd->dst_fmt = dst_fmt;
   192             return cd;
   193         }
   194     }
   195     return (SDL_iconv_t) - 1;
   196 }
   197 
   198 size_t
   199 SDL_iconv(SDL_iconv_t cd,
   200           const char **inbuf, size_t * inbytesleft,
   201           char **outbuf, size_t * outbytesleft)
   202 {
   203     /* For simplicity, we'll convert everything to and from UCS-4 */
   204     const char *src;
   205     char *dst;
   206     size_t srclen, dstlen;
   207     Uint32 ch = 0;
   208     size_t total;
   209 
   210     if (!inbuf || !*inbuf) {
   211         /* Reset the context */
   212         return 0;
   213     }
   214     if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
   215         return SDL_ICONV_E2BIG;
   216     }
   217     src = *inbuf;
   218     srclen = (inbytesleft ? *inbytesleft : 0);
   219     dst = *outbuf;
   220     dstlen = *outbytesleft;
   221 
   222     switch (cd->src_fmt) {
   223     case ENCODING_UTF16:
   224         /* Scan for a byte order marker */
   225         {
   226             Uint8 *p = (Uint8 *) src;
   227             size_t n = srclen / 2;
   228             while (n) {
   229                 if (p[0] == 0xFF && p[1] == 0xFE) {
   230                     cd->src_fmt = ENCODING_UTF16BE;
   231                     break;
   232                 } else if (p[0] == 0xFE && p[1] == 0xFF) {
   233                     cd->src_fmt = ENCODING_UTF16LE;
   234                     break;
   235                 }
   236                 p += 2;
   237                 --n;
   238             }
   239             if (n == 0) {
   240                 /* We can't tell, default to host order */
   241                 cd->src_fmt = ENCODING_UTF16NATIVE;
   242             }
   243         }
   244         break;
   245     case ENCODING_UTF32:
   246         /* Scan for a byte order marker */
   247         {
   248             Uint8 *p = (Uint8 *) src;
   249             size_t n = srclen / 4;
   250             while (n) {
   251                 if (p[0] == 0xFF && p[1] == 0xFE &&
   252                     p[2] == 0x00 && p[3] == 0x00) {
   253                     cd->src_fmt = ENCODING_UTF32BE;
   254                     break;
   255                 } else if (p[0] == 0x00 && p[1] == 0x00 &&
   256                            p[2] == 0xFE && p[3] == 0xFF) {
   257                     cd->src_fmt = ENCODING_UTF32LE;
   258                     break;
   259                 }
   260                 p += 4;
   261                 --n;
   262             }
   263             if (n == 0) {
   264                 /* We can't tell, default to host order */
   265                 cd->src_fmt = ENCODING_UTF32NATIVE;
   266             }
   267         }
   268         break;
   269     }
   270 
   271     switch (cd->dst_fmt) {
   272     case ENCODING_UTF16:
   273         /* Default to host order, need to add byte order marker */
   274         if (dstlen < 2) {
   275             return SDL_ICONV_E2BIG;
   276         }
   277         *(Uint16 *) dst = UNICODE_BOM;
   278         dst += 2;
   279         dstlen -= 2;
   280         cd->dst_fmt = ENCODING_UTF16NATIVE;
   281         break;
   282     case ENCODING_UTF32:
   283         /* Default to host order, need to add byte order marker */
   284         if (dstlen < 4) {
   285             return SDL_ICONV_E2BIG;
   286         }
   287         *(Uint32 *) dst = UNICODE_BOM;
   288         dst += 4;
   289         dstlen -= 4;
   290         cd->dst_fmt = ENCODING_UTF32NATIVE;
   291         break;
   292     }
   293 
   294     total = 0;
   295     while (srclen > 0) {
   296         /* Decode a character */
   297         switch (cd->src_fmt) {
   298         case ENCODING_ASCII:
   299             {
   300                 Uint8 *p = (Uint8 *) src;
   301                 ch = (Uint32) (p[0] & 0x7F);
   302                 ++src;
   303                 --srclen;
   304             }
   305             break;
   306         case ENCODING_LATIN1:
   307             {
   308                 Uint8 *p = (Uint8 *) src;
   309                 ch = (Uint32) p[0];
   310                 ++src;
   311                 --srclen;
   312             }
   313             break;
   314         case ENCODING_UTF8:    /* RFC 3629 */
   315             {
   316                 Uint8 *p = (Uint8 *) src;
   317                 size_t left = 0;
   318                 SDL_bool overlong = SDL_FALSE;
   319                 if (p[0] >= 0xFC) {
   320                     if ((p[0] & 0xFE) != 0xFC) {
   321                         /* Skip illegal sequences
   322                            return SDL_ICONV_EILSEQ;
   323                          */
   324                         ch = UNKNOWN_UNICODE;
   325                     } else {
   326                         if (p[0] == 0xFC) {
   327                             overlong = SDL_TRUE;
   328                         }
   329                         ch = (Uint32) (p[0] & 0x01);
   330                         left = 5;
   331                     }
   332                 } else if (p[0] >= 0xF8) {
   333                     if ((p[0] & 0xFC) != 0xF8) {
   334                         /* Skip illegal sequences
   335                            return SDL_ICONV_EILSEQ;
   336                          */
   337                         ch = UNKNOWN_UNICODE;
   338                     } else {
   339                         if (p[0] == 0xF8) {
   340                             overlong = SDL_TRUE;
   341                         }
   342                         ch = (Uint32) (p[0] & 0x03);
   343                         left = 4;
   344                     }
   345                 } else if (p[0] >= 0xF0) {
   346                     if ((p[0] & 0xF8) != 0xF0) {
   347                         /* Skip illegal sequences
   348                            return SDL_ICONV_EILSEQ;
   349                          */
   350                         ch = UNKNOWN_UNICODE;
   351                     } else {
   352                         if (p[0] == 0xF0) {
   353                             overlong = SDL_TRUE;
   354                         }
   355                         ch = (Uint32) (p[0] & 0x07);
   356                         left = 3;
   357                     }
   358                 } else if (p[0] >= 0xE0) {
   359                     if ((p[0] & 0xF0) != 0xE0) {
   360                         /* Skip illegal sequences
   361                            return SDL_ICONV_EILSEQ;
   362                          */
   363                         ch = UNKNOWN_UNICODE;
   364                     } else {
   365                         if (p[0] == 0xE0) {
   366                             overlong = SDL_TRUE;
   367                         }
   368                         ch = (Uint32) (p[0] & 0x0F);
   369                         left = 2;
   370                     }
   371                 } else if (p[0] >= 0xC0) {
   372                     if ((p[0] & 0xE0) != 0xC0) {
   373                         /* Skip illegal sequences
   374                            return SDL_ICONV_EILSEQ;
   375                          */
   376                         ch = UNKNOWN_UNICODE;
   377                     } else {
   378                         if ((p[0] & 0xCE) == 0xC0) {
   379                             overlong = SDL_TRUE;
   380                         }
   381                         ch = (Uint32) (p[0] & 0x1F);
   382                         left = 1;
   383                     }
   384                 } else {
   385                     if ((p[0] & 0x80) != 0x00) {
   386                         /* Skip illegal sequences
   387                            return SDL_ICONV_EILSEQ;
   388                          */
   389                         ch = UNKNOWN_UNICODE;
   390                     } else {
   391                         ch = (Uint32) p[0];
   392                     }
   393                 }
   394                 ++src;
   395                 --srclen;
   396                 if (srclen < left) {
   397                     return SDL_ICONV_EINVAL;
   398                 }
   399                 while (left--) {
   400                     ++p;
   401                     if ((p[0] & 0xC0) != 0x80) {
   402                         /* Skip illegal sequences
   403                            return SDL_ICONV_EILSEQ;
   404                          */
   405                         ch = UNKNOWN_UNICODE;
   406                         break;
   407                     }
   408                     ch <<= 6;
   409                     ch |= (p[0] & 0x3F);
   410                     ++src;
   411                     --srclen;
   412                 }
   413                 if (overlong) {
   414                     /* Potential security risk
   415                        return SDL_ICONV_EILSEQ;
   416                      */
   417                     ch = UNKNOWN_UNICODE;
   418                 }
   419                 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
   420                     (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
   421                     /* Skip illegal sequences
   422                        return SDL_ICONV_EILSEQ;
   423                      */
   424                     ch = UNKNOWN_UNICODE;
   425                 }
   426             }
   427             break;
   428         case ENCODING_UTF16BE: /* RFC 2781 */
   429             {
   430                 Uint8 *p = (Uint8 *) src;
   431                 Uint16 W1, W2;
   432                 if (srclen < 2) {
   433                     return SDL_ICONV_EINVAL;
   434                 }
   435                 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   436                 src += 2;
   437                 srclen -= 2;
   438                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   439                     ch = (Uint32) W1;
   440                     break;
   441                 }
   442                 if (W1 > 0xDBFF) {
   443                     /* Skip illegal sequences
   444                        return SDL_ICONV_EILSEQ;
   445                      */
   446                     ch = UNKNOWN_UNICODE;
   447                     break;
   448                 }
   449                 if (srclen < 2) {
   450                     return SDL_ICONV_EINVAL;
   451                 }
   452                 p = (Uint8 *) src;
   453                 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   454                 src += 2;
   455                 srclen -= 2;
   456                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   457                     /* Skip illegal sequences
   458                        return SDL_ICONV_EILSEQ;
   459                      */
   460                     ch = UNKNOWN_UNICODE;
   461                     break;
   462                 }
   463                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   464                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   465             }
   466             break;
   467         case ENCODING_UTF16LE: /* RFC 2781 */
   468             {
   469                 Uint8 *p = (Uint8 *) src;
   470                 Uint16 W1, W2;
   471                 if (srclen < 2) {
   472                     return SDL_ICONV_EINVAL;
   473                 }
   474                 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   475                 src += 2;
   476                 srclen -= 2;
   477                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   478                     ch = (Uint32) W1;
   479                     break;
   480                 }
   481                 if (W1 > 0xDBFF) {
   482                     /* Skip illegal sequences
   483                        return SDL_ICONV_EILSEQ;
   484                      */
   485                     ch = UNKNOWN_UNICODE;
   486                     break;
   487                 }
   488                 if (srclen < 2) {
   489                     return SDL_ICONV_EINVAL;
   490                 }
   491                 p = (Uint8 *) src;
   492                 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   493                 src += 2;
   494                 srclen -= 2;
   495                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   496                     /* Skip illegal sequences
   497                        return SDL_ICONV_EILSEQ;
   498                      */
   499                     ch = UNKNOWN_UNICODE;
   500                     break;
   501                 }
   502                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   503                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   504             }
   505             break;
   506         case ENCODING_UTF32BE:
   507             {
   508                 Uint8 *p = (Uint8 *) src;
   509                 if (srclen < 4) {
   510                     return SDL_ICONV_EINVAL;
   511                 }
   512                 ch = ((Uint32) p[0] << 24) |
   513                     ((Uint32) p[1] << 16) |
   514                     ((Uint32) p[2] << 8) | (Uint32) p[3];
   515                 src += 4;
   516                 srclen -= 4;
   517             }
   518             break;
   519         case ENCODING_UTF32LE:
   520             {
   521                 Uint8 *p = (Uint8 *) src;
   522                 if (srclen < 4) {
   523                     return SDL_ICONV_EINVAL;
   524                 }
   525                 ch = ((Uint32) p[3] << 24) |
   526                     ((Uint32) p[2] << 16) |
   527                     ((Uint32) p[1] << 8) | (Uint32) p[0];
   528                 src += 4;
   529                 srclen -= 4;
   530             }
   531             break;
   532         case ENCODING_UCS2:
   533             {
   534                 Uint16 *p = (Uint16 *) src;
   535                 if (srclen < 2) {
   536                     return SDL_ICONV_EINVAL;
   537                 }
   538                 ch = *p;
   539                 src += 2;
   540                 srclen -= 2;
   541             }
   542             break;
   543         case ENCODING_UCS4:
   544             {
   545                 Uint32 *p = (Uint32 *) src;
   546                 if (srclen < 4) {
   547                     return SDL_ICONV_EINVAL;
   548                 }
   549                 ch = *p;
   550                 src += 4;
   551                 srclen -= 4;
   552             }
   553             break;
   554         }
   555 
   556         /* Encode a character */
   557         switch (cd->dst_fmt) {
   558         case ENCODING_ASCII:
   559             {
   560                 Uint8 *p = (Uint8 *) dst;
   561                 if (dstlen < 1) {
   562                     return SDL_ICONV_E2BIG;
   563                 }
   564                 if (ch > 0x7F) {
   565                     *p = UNKNOWN_ASCII;
   566                 } else {
   567                     *p = (Uint8) ch;
   568                 }
   569                 ++dst;
   570                 --dstlen;
   571             }
   572             break;
   573         case ENCODING_LATIN1:
   574             {
   575                 Uint8 *p = (Uint8 *) dst;
   576                 if (dstlen < 1) {
   577                     return SDL_ICONV_E2BIG;
   578                 }
   579                 if (ch > 0xFF) {
   580                     *p = UNKNOWN_ASCII;
   581                 } else {
   582                     *p = (Uint8) ch;
   583                 }
   584                 ++dst;
   585                 --dstlen;
   586             }
   587             break;
   588         case ENCODING_UTF8:    /* RFC 3629 */
   589             {
   590                 Uint8 *p = (Uint8 *) dst;
   591                 if (ch > 0x10FFFF) {
   592                     ch = UNKNOWN_UNICODE;
   593                 }
   594                 if (ch <= 0x7F) {
   595                     if (dstlen < 1) {
   596                         return SDL_ICONV_E2BIG;
   597                     }
   598                     *p = (Uint8) ch;
   599                     ++dst;
   600                     --dstlen;
   601                 } else if (ch <= 0x7FF) {
   602                     if (dstlen < 2) {
   603                         return SDL_ICONV_E2BIG;
   604                     }
   605                     p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
   606                     p[1] = 0x80 | (Uint8) (ch & 0x3F);
   607                     dst += 2;
   608                     dstlen -= 2;
   609                 } else if (ch <= 0xFFFF) {
   610                     if (dstlen < 3) {
   611                         return SDL_ICONV_E2BIG;
   612                     }
   613                     p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
   614                     p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   615                     p[2] = 0x80 | (Uint8) (ch & 0x3F);
   616                     dst += 3;
   617                     dstlen -= 3;
   618                 } else if (ch <= 0x1FFFFF) {
   619                     if (dstlen < 4) {
   620                         return SDL_ICONV_E2BIG;
   621                     }
   622                     p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
   623                     p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   624                     p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   625                     p[3] = 0x80 | (Uint8) (ch & 0x3F);
   626                     dst += 4;
   627                     dstlen -= 4;
   628                 } else if (ch <= 0x3FFFFFF) {
   629                     if (dstlen < 5) {
   630                         return SDL_ICONV_E2BIG;
   631                     }
   632                     p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
   633                     p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   634                     p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   635                     p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   636                     p[4] = 0x80 | (Uint8) (ch & 0x3F);
   637                     dst += 5;
   638                     dstlen -= 5;
   639                 } else {
   640                     if (dstlen < 6) {
   641                         return SDL_ICONV_E2BIG;
   642                     }
   643                     p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
   644                     p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
   645                     p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   646                     p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   647                     p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   648                     p[5] = 0x80 | (Uint8) (ch & 0x3F);
   649                     dst += 6;
   650                     dstlen -= 6;
   651                 }
   652             }
   653             break;
   654         case ENCODING_UTF16BE: /* RFC 2781 */
   655             {
   656                 Uint8 *p = (Uint8 *) dst;
   657                 if (ch > 0x10FFFF) {
   658                     ch = UNKNOWN_UNICODE;
   659                 }
   660                 if (ch < 0x10000) {
   661                     if (dstlen < 2) {
   662                         return SDL_ICONV_E2BIG;
   663                     }
   664                     p[0] = (Uint8) (ch >> 8);
   665                     p[1] = (Uint8) ch;
   666                     dst += 2;
   667                     dstlen -= 2;
   668                 } else {
   669                     Uint16 W1, W2;
   670                     if (dstlen < 4) {
   671                         return SDL_ICONV_E2BIG;
   672                     }
   673                     ch = ch - 0x10000;
   674                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   675                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   676                     p[0] = (Uint8) (W1 >> 8);
   677                     p[1] = (Uint8) W1;
   678                     p[2] = (Uint8) (W2 >> 8);
   679                     p[3] = (Uint8) W2;
   680                     dst += 4;
   681                     dstlen -= 4;
   682                 }
   683             }
   684             break;
   685         case ENCODING_UTF16LE: /* RFC 2781 */
   686             {
   687                 Uint8 *p = (Uint8 *) dst;
   688                 if (ch > 0x10FFFF) {
   689                     ch = UNKNOWN_UNICODE;
   690                 }
   691                 if (ch < 0x10000) {
   692                     if (dstlen < 2) {
   693                         return SDL_ICONV_E2BIG;
   694                     }
   695                     p[1] = (Uint8) (ch >> 8);
   696                     p[0] = (Uint8) ch;
   697                     dst += 2;
   698                     dstlen -= 2;
   699                 } else {
   700                     Uint16 W1, W2;
   701                     if (dstlen < 4) {
   702                         return SDL_ICONV_E2BIG;
   703                     }
   704                     ch = ch - 0x10000;
   705                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   706                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   707                     p[1] = (Uint8) (W1 >> 8);
   708                     p[0] = (Uint8) W1;
   709                     p[3] = (Uint8) (W2 >> 8);
   710                     p[2] = (Uint8) W2;
   711                     dst += 4;
   712                     dstlen -= 4;
   713                 }
   714             }
   715             break;
   716         case ENCODING_UTF32BE:
   717             {
   718                 Uint8 *p = (Uint8 *) dst;
   719                 if (ch > 0x10FFFF) {
   720                     ch = UNKNOWN_UNICODE;
   721                 }
   722                 if (dstlen < 4) {
   723                     return SDL_ICONV_E2BIG;
   724                 }
   725                 p[0] = (Uint8) (ch >> 24);
   726                 p[1] = (Uint8) (ch >> 16);
   727                 p[2] = (Uint8) (ch >> 8);
   728                 p[3] = (Uint8) ch;
   729                 dst += 4;
   730                 dstlen -= 4;
   731             }
   732             break;
   733         case ENCODING_UTF32LE:
   734             {
   735                 Uint8 *p = (Uint8 *) dst;
   736                 if (ch > 0x10FFFF) {
   737                     ch = UNKNOWN_UNICODE;
   738                 }
   739                 if (dstlen < 4) {
   740                     return SDL_ICONV_E2BIG;
   741                 }
   742                 p[3] = (Uint8) (ch >> 24);
   743                 p[2] = (Uint8) (ch >> 16);
   744                 p[1] = (Uint8) (ch >> 8);
   745                 p[0] = (Uint8) ch;
   746                 dst += 4;
   747                 dstlen -= 4;
   748             }
   749             break;
   750         case ENCODING_UCS2:
   751             {
   752                 Uint16 *p = (Uint16 *) dst;
   753                 if (ch > 0xFFFF) {
   754                     ch = UNKNOWN_UNICODE;
   755                 }
   756                 if (dstlen < 2) {
   757                     return SDL_ICONV_E2BIG;
   758                 }
   759                 *p = (Uint16) ch;
   760                 dst += 2;
   761                 dstlen -= 2;
   762             }
   763             break;
   764         case ENCODING_UCS4:
   765             {
   766                 Uint32 *p = (Uint32 *) dst;
   767                 if (ch > 0x7FFFFFFF) {
   768                     ch = UNKNOWN_UNICODE;
   769                 }
   770                 if (dstlen < 4) {
   771                     return SDL_ICONV_E2BIG;
   772                 }
   773                 *p = ch;
   774                 dst += 4;
   775                 dstlen -= 4;
   776             }
   777             break;
   778         }
   779 
   780         /* Update state */
   781         *inbuf = src;
   782         *inbytesleft = srclen;
   783         *outbuf = dst;
   784         *outbytesleft = dstlen;
   785         ++total;
   786     }
   787     return total;
   788 }
   789 
   790 int
   791 SDL_iconv_close(SDL_iconv_t cd)
   792 {
   793     if (cd && cd != (SDL_iconv_t) - 1) {
   794         SDL_free(cd);
   795     }
   796     return 0;
   797 }
   798 
   799 #endif /* !HAVE_ICONV */
   800 
   801 static const char *
   802 getlocale()
   803 {
   804     const char *lang;
   805 
   806     lang = SDL_getenv("LC_ALL");
   807     if (!lang) {
   808         lang = SDL_getenv("LC_CTYPE");
   809     }
   810     if (!lang) {
   811         lang = SDL_getenv("LC_MESSAGES");
   812     }
   813     if (!lang) {
   814         lang = SDL_getenv("LANG");
   815     }
   816     if (!lang || SDL_strcmp(lang, "C") == 0) {
   817         lang = "ASCII";
   818     }
   819     return lang;
   820 }
   821 
   822 char *
   823 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
   824                  size_t inbytesleft)
   825 {
   826     SDL_iconv_t cd;
   827     char *string;
   828     size_t stringsize;
   829     char *outbuf;
   830     size_t outbytesleft;
   831     size_t retCode = 0;
   832 
   833     cd = SDL_iconv_open(tocode, fromcode);
   834     if (cd == (SDL_iconv_t) - 1) {
   835         /* See if we can recover here (fixes iconv on Solaris 11) */
   836         if (!tocode || !*tocode) {
   837             tocode = "UTF-8";
   838         }
   839         if (!fromcode || !*fromcode) {
   840             tocode = "UTF-8";
   841         }
   842         cd = SDL_iconv_open(tocode, fromcode);
   843     }
   844     if (cd == (SDL_iconv_t) - 1) {
   845         return NULL;
   846     }
   847 
   848     stringsize = inbytesleft > 4 ? inbytesleft : 4;
   849     string = SDL_malloc(stringsize);
   850     if (!string) {
   851         SDL_iconv_close(cd);
   852         return NULL;
   853     }
   854     outbuf = string;
   855     outbytesleft = stringsize;
   856     SDL_memset(outbuf, 0, 4);
   857 
   858     while (inbytesleft > 0) {
   859         retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   860         switch (retCode) {
   861         case SDL_ICONV_E2BIG:
   862             {
   863                 char *oldstring = string;
   864                 stringsize *= 2;
   865                 string = SDL_realloc(string, stringsize);
   866                 if (!string) {
   867                     SDL_iconv_close(cd);
   868                     return NULL;
   869                 }
   870                 outbuf = string + (outbuf - oldstring);
   871                 outbytesleft = stringsize - (outbuf - string);
   872                 SDL_memset(outbuf, 0, 4);
   873             }
   874             break;
   875         case SDL_ICONV_EILSEQ:
   876             /* Try skipping some input data - not perfect, but... */
   877             ++inbuf;
   878             --inbytesleft;
   879             break;
   880         case SDL_ICONV_EINVAL:
   881         case SDL_ICONV_ERROR:
   882             /* We can't continue... */
   883             inbytesleft = 0;
   884             break;
   885         }
   886     }
   887     SDL_iconv_close(cd);
   888 
   889     return string;
   890 }
   891 
   892 /* vi: set ts=4 sw=4 expandtab: */