src/stdlib/SDL_iconv.c
author Sam Lantinga <slouken@libsdl.org>
Wed, 04 Jul 2007 08:01:04 +0000
changeset 2143 e906da4414a3
parent 2142 ca80c942e69c
child 2182 cc2597da0840
permissions -rw-r--r--
Fix for bug #447 merged from SDL 1.2
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This file contains portable iconv functions for SDL */
    25 
    26 #include "SDL_stdinc.h"
    27 #include "SDL_endian.h"
    28 
    29 #ifdef HAVE_ICONV
    30 
    31 /* Depending on which standard the iconv() was implemented with,
    32    iconv() may or may not use const char ** for the inbuf param.
    33    If we get this wrong, it's just a warning, so no big deal.
    34 */
    35 #if defined(_XGP6) || \
    36     defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
    37 #define ICONV_INBUF_NONCONST
    38 #endif
    39 
    40 #include <errno.h>
    41 
    42 size_t
    43 SDL_iconv(SDL_iconv_t cd,
    44           const char **inbuf, size_t * inbytesleft,
    45           char **outbuf, size_t * outbytesleft)
    46 {
    47     size_t retCode;
    48 #ifdef ICONV_INBUF_NONCONST
    49     retCode = iconv(cd, (char **) inbuf, inbytesleft, outbuf, outbytesleft);
    50 #else
    51     retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
    52 #endif
    53     if (retCode == (size_t) - 1) {
    54         switch (errno) {
    55         case E2BIG:
    56             return SDL_ICONV_E2BIG;
    57         case EILSEQ:
    58             return SDL_ICONV_EILSEQ;
    59         case EINVAL:
    60             return SDL_ICONV_EINVAL;
    61         default:
    62             return SDL_ICONV_ERROR;
    63         }
    64     }
    65     return retCode;
    66 }
    67 
    68 #else
    69 
    70 /* Lots of useful information on Unicode at:
    71 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
    72 */
    73 
    74 #define UNICODE_BOM	0xFEFF
    75 
    76 #define UNKNOWN_ASCII	'?'
    77 #define UNKNOWN_UNICODE	0xFFFD
    78 
    79 enum
    80 {
    81     ENCODING_UNKNOWN,
    82     ENCODING_ASCII,
    83     ENCODING_LATIN1,
    84     ENCODING_UTF8,
    85     ENCODING_UTF16,             /* Needs byte order marker */
    86     ENCODING_UTF16BE,
    87     ENCODING_UTF16LE,
    88     ENCODING_UTF32,             /* Needs byte order marker */
    89     ENCODING_UTF32BE,
    90     ENCODING_UTF32LE,
    91     ENCODING_UCS2,              /* Native byte order assumed */
    92     ENCODING_UCS4,              /* Native byte order assumed */
    93 };
    94 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    95 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    96 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    97 #else
    98 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
    99 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
   100 #endif
   101 
   102 struct _SDL_iconv_t
   103 {
   104     int src_fmt;
   105     int dst_fmt;
   106 };
   107 
   108 static struct
   109 {
   110     const char *name;
   111     int format;
   112 } encodings[] = {
   113 /* *INDENT-OFF* */
   114     { "646", ENCODING_ASCII },
   115     { "ASCII", ENCODING_ASCII },
   116     { "US-ASCII", ENCODING_ASCII },
   117     { "LATIN1", ENCODING_LATIN1 },
   118     { "8859-1", ENCODING_LATIN1 },
   119     { "ISO-8859-1", ENCODING_LATIN1 },
   120     { "UTF8", ENCODING_UTF8 },
   121     { "UTF-8", ENCODING_UTF8 },
   122     { "UTF16", ENCODING_UTF16 },
   123     { "UTF-16", ENCODING_UTF16 },
   124     { "UTF16BE", ENCODING_UTF16BE },
   125     { "UTF-16BE", ENCODING_UTF16BE },
   126     { "UTF16LE", ENCODING_UTF16LE },
   127     { "UTF-16LE", ENCODING_UTF16LE },
   128     { "UTF32", ENCODING_UTF32 },
   129     { "UTF-32", ENCODING_UTF32 },
   130     { "UTF32BE", ENCODING_UTF32BE },
   131     { "UTF-32BE", ENCODING_UTF32BE },
   132     { "UTF32LE", ENCODING_UTF32LE },
   133     { "UTF-32LE", ENCODING_UTF32LE },
   134     { "UCS2", ENCODING_UCS2 },
   135     { "UCS-2", ENCODING_UCS2 },
   136     { "UCS4", ENCODING_UCS4 },
   137     { "UCS-4", ENCODING_UCS4 },
   138 /* *INDENT-ON* */
   139 };
   140 
   141 SDL_iconv_t
   142 SDL_iconv_open(const char *tocode, const char *fromcode)
   143 {
   144     int src_fmt = ENCODING_UNKNOWN;
   145     int dst_fmt = ENCODING_UNKNOWN;
   146     int i;
   147 
   148     for (i = 0; i < SDL_arraysize(encodings); ++i) {
   149         if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
   150             src_fmt = encodings[i].format;
   151             if (dst_fmt != ENCODING_UNKNOWN) {
   152                 break;
   153             }
   154         }
   155         if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
   156             dst_fmt = encodings[i].format;
   157             if (src_fmt != ENCODING_UNKNOWN) {
   158                 break;
   159             }
   160         }
   161     }
   162     if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
   163         SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
   164         if (cd) {
   165             cd->src_fmt = src_fmt;
   166             cd->dst_fmt = dst_fmt;
   167             return cd;
   168         }
   169     }
   170     return (SDL_iconv_t) - 1;
   171 }
   172 
   173 size_t
   174 SDL_iconv(SDL_iconv_t cd,
   175           const char **inbuf, size_t * inbytesleft,
   176           char **outbuf, size_t * outbytesleft)
   177 {
   178     /* For simplicity, we'll convert everything to and from UCS-4 */
   179     const char *src;
   180     char *dst;
   181     size_t srclen, dstlen;
   182     Uint32 ch = 0;
   183     size_t total;
   184 
   185     if (!inbuf || !*inbuf) {
   186         /* Reset the context */
   187         return 0;
   188     }
   189     if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
   190         return SDL_ICONV_E2BIG;
   191     }
   192     src = *inbuf;
   193     srclen = (inbytesleft ? *inbytesleft : 0);
   194     dst = *outbuf;
   195     dstlen = *outbytesleft;
   196 
   197     switch (cd->src_fmt) {
   198     case ENCODING_UTF16:
   199         /* Scan for a byte order marker */
   200         {
   201             Uint8 *p = (Uint8 *) src;
   202             size_t n = srclen / 2;
   203             while (n) {
   204                 if (p[0] == 0xFF && p[1] == 0xFE) {
   205                     cd->src_fmt = ENCODING_UTF16BE;
   206                     break;
   207                 } else if (p[0] == 0xFE && p[1] == 0xFF) {
   208                     cd->src_fmt = ENCODING_UTF16LE;
   209                     break;
   210                 }
   211                 p += 2;
   212                 --n;
   213             }
   214             if (n == 0) {
   215                 /* We can't tell, default to host order */
   216                 cd->src_fmt = ENCODING_UTF16NATIVE;
   217             }
   218         }
   219         break;
   220     case ENCODING_UTF32:
   221         /* Scan for a byte order marker */
   222         {
   223             Uint8 *p = (Uint8 *) src;
   224             size_t n = srclen / 4;
   225             while (n) {
   226                 if (p[0] == 0xFF && p[1] == 0xFE &&
   227                     p[2] == 0x00 && p[3] == 0x00) {
   228                     cd->src_fmt = ENCODING_UTF32BE;
   229                     break;
   230                 } else if (p[0] == 0x00 && p[1] == 0x00 &&
   231                            p[2] == 0xFE && p[3] == 0xFF) {
   232                     cd->src_fmt = ENCODING_UTF32LE;
   233                     break;
   234                 }
   235                 p += 4;
   236                 --n;
   237             }
   238             if (n == 0) {
   239                 /* We can't tell, default to host order */
   240                 cd->src_fmt = ENCODING_UTF32NATIVE;
   241             }
   242         }
   243         break;
   244     }
   245 
   246     switch (cd->dst_fmt) {
   247     case ENCODING_UTF16:
   248         /* Default to host order, need to add byte order marker */
   249         if (dstlen < 2) {
   250             return SDL_ICONV_E2BIG;
   251         }
   252         *(Uint16 *) dst = UNICODE_BOM;
   253         dst += 2;
   254         dstlen -= 2;
   255         cd->dst_fmt = ENCODING_UTF16NATIVE;
   256         break;
   257     case ENCODING_UTF32:
   258         /* Default to host order, need to add byte order marker */
   259         if (dstlen < 4) {
   260             return SDL_ICONV_E2BIG;
   261         }
   262         *(Uint32 *) dst = UNICODE_BOM;
   263         dst += 4;
   264         dstlen -= 4;
   265         cd->dst_fmt = ENCODING_UTF32NATIVE;
   266         break;
   267     }
   268 
   269     total = 0;
   270     while (srclen > 0) {
   271         /* Decode a character */
   272         switch (cd->src_fmt) {
   273         case ENCODING_ASCII:
   274             {
   275                 Uint8 *p = (Uint8 *) src;
   276                 ch = (Uint32) (p[0] & 0x7F);
   277                 ++src;
   278                 --srclen;
   279             }
   280             break;
   281         case ENCODING_LATIN1:
   282             {
   283                 Uint8 *p = (Uint8 *) src;
   284                 ch = (Uint32) p[0];
   285                 ++src;
   286                 --srclen;
   287             }
   288             break;
   289         case ENCODING_UTF8:    /* RFC 3629 */
   290             {
   291                 Uint8 *p = (Uint8 *) src;
   292                 size_t left = 0;
   293                 SDL_bool overlong = SDL_FALSE;
   294                 if (p[0] >= 0xFC) {
   295                     if ((p[0] & 0xFE) != 0xFC) {
   296                         /* Skip illegal sequences
   297                            return SDL_ICONV_EILSEQ;
   298                          */
   299                         ch = UNKNOWN_UNICODE;
   300                     } else {
   301                         if (p[0] == 0xFC) {
   302                             overlong = SDL_TRUE;
   303                         }
   304                         ch = (Uint32) (p[0] & 0x01);
   305                         left = 5;
   306                     }
   307                 } else if (p[0] >= 0xF8) {
   308                     if ((p[0] & 0xFC) != 0xF8) {
   309                         /* Skip illegal sequences
   310                            return SDL_ICONV_EILSEQ;
   311                          */
   312                         ch = UNKNOWN_UNICODE;
   313                     } else {
   314                         if (p[0] == 0xF8) {
   315                             overlong = SDL_TRUE;
   316                         }
   317                         ch = (Uint32) (p[0] & 0x03);
   318                         left = 4;
   319                     }
   320                 } else if (p[0] >= 0xF0) {
   321                     if ((p[0] & 0xF8) != 0xF0) {
   322                         /* Skip illegal sequences
   323                            return SDL_ICONV_EILSEQ;
   324                          */
   325                         ch = UNKNOWN_UNICODE;
   326                     } else {
   327                         if (p[0] == 0xF0) {
   328                             overlong = SDL_TRUE;
   329                         }
   330                         ch = (Uint32) (p[0] & 0x07);
   331                         left = 3;
   332                     }
   333                 } else if (p[0] >= 0xE0) {
   334                     if ((p[0] & 0xF0) != 0xE0) {
   335                         /* Skip illegal sequences
   336                            return SDL_ICONV_EILSEQ;
   337                          */
   338                         ch = UNKNOWN_UNICODE;
   339                     } else {
   340                         if (p[0] == 0xE0) {
   341                             overlong = SDL_TRUE;
   342                         }
   343                         ch = (Uint32) (p[0] & 0x0F);
   344                         left = 2;
   345                     }
   346                 } else if (p[0] >= 0xC0) {
   347                     if ((p[0] & 0xE0) != 0xC0) {
   348                         /* Skip illegal sequences
   349                            return SDL_ICONV_EILSEQ;
   350                          */
   351                         ch = UNKNOWN_UNICODE;
   352                     } else {
   353                         if ((p[0] & 0xCE) == 0xC0) {
   354                             overlong = SDL_TRUE;
   355                         }
   356                         ch = (Uint32) (p[0] & 0x1F);
   357                         left = 1;
   358                     }
   359                 } else {
   360                     if ((p[0] & 0x80) != 0x00) {
   361                         /* Skip illegal sequences
   362                            return SDL_ICONV_EILSEQ;
   363                          */
   364                         ch = UNKNOWN_UNICODE;
   365                     } else {
   366                         ch = (Uint32) p[0];
   367                     }
   368                 }
   369                 ++src;
   370                 --srclen;
   371                 if (srclen < left) {
   372                     return SDL_ICONV_EINVAL;
   373                 }
   374                 while (left--) {
   375                     ++p;
   376                     if ((p[0] & 0xC0) != 0x80) {
   377                         /* Skip illegal sequences
   378                            return SDL_ICONV_EILSEQ;
   379                          */
   380                         ch = UNKNOWN_UNICODE;
   381                         break;
   382                     }
   383                     ch <<= 6;
   384                     ch |= (p[0] & 0x3F);
   385                     ++src;
   386                     --srclen;
   387                 }
   388                 if (overlong) {
   389                     /* Potential security risk
   390                        return SDL_ICONV_EILSEQ;
   391                      */
   392                     ch = UNKNOWN_UNICODE;
   393                 }
   394                 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
   395                     (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
   396                     /* Skip illegal sequences
   397                        return SDL_ICONV_EILSEQ;
   398                      */
   399                     ch = UNKNOWN_UNICODE;
   400                 }
   401             }
   402             break;
   403         case ENCODING_UTF16BE: /* RFC 2781 */
   404             {
   405                 Uint8 *p = (Uint8 *) src;
   406                 Uint16 W1, W2;
   407                 if (srclen < 2) {
   408                     return SDL_ICONV_EINVAL;
   409                 }
   410                 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   411                 src += 2;
   412                 srclen -= 2;
   413                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   414                     ch = (Uint32) W1;
   415                     break;
   416                 }
   417                 if (W1 > 0xDBFF) {
   418                     /* Skip illegal sequences
   419                        return SDL_ICONV_EILSEQ;
   420                      */
   421                     ch = UNKNOWN_UNICODE;
   422                     break;
   423                 }
   424                 if (srclen < 2) {
   425                     return SDL_ICONV_EINVAL;
   426                 }
   427                 p = (Uint8 *) src;
   428                 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
   429                 src += 2;
   430                 srclen -= 2;
   431                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   432                     /* Skip illegal sequences
   433                        return SDL_ICONV_EILSEQ;
   434                      */
   435                     ch = UNKNOWN_UNICODE;
   436                     break;
   437                 }
   438                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   439                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   440             }
   441             break;
   442         case ENCODING_UTF16LE: /* RFC 2781 */
   443             {
   444                 Uint8 *p = (Uint8 *) src;
   445                 Uint16 W1, W2;
   446                 if (srclen < 2) {
   447                     return SDL_ICONV_EINVAL;
   448                 }
   449                 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   450                 src += 2;
   451                 srclen -= 2;
   452                 if (W1 < 0xD800 || W1 > 0xDFFF) {
   453                     ch = (Uint32) W1;
   454                     break;
   455                 }
   456                 if (W1 > 0xDBFF) {
   457                     /* Skip illegal sequences
   458                        return SDL_ICONV_EILSEQ;
   459                      */
   460                     ch = UNKNOWN_UNICODE;
   461                     break;
   462                 }
   463                 if (srclen < 2) {
   464                     return SDL_ICONV_EINVAL;
   465                 }
   466                 p = (Uint8 *) src;
   467                 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
   468                 src += 2;
   469                 srclen -= 2;
   470                 if (W2 < 0xDC00 || W2 > 0xDFFF) {
   471                     /* Skip illegal sequences
   472                        return SDL_ICONV_EILSEQ;
   473                      */
   474                     ch = UNKNOWN_UNICODE;
   475                     break;
   476                 }
   477                 ch = (((Uint32) (W1 & 0x3FF) << 10) |
   478                       (Uint32) (W2 & 0x3FF)) + 0x10000;
   479             }
   480             break;
   481         case ENCODING_UTF32BE:
   482             {
   483                 Uint8 *p = (Uint8 *) src;
   484                 if (srclen < 4) {
   485                     return SDL_ICONV_EINVAL;
   486                 }
   487                 ch = ((Uint32) p[0] << 24) |
   488                     ((Uint32) p[1] << 16) |
   489                     ((Uint32) p[2] << 8) | (Uint32) p[3];
   490                 src += 4;
   491                 srclen -= 4;
   492             }
   493             break;
   494         case ENCODING_UTF32LE:
   495             {
   496                 Uint8 *p = (Uint8 *) src;
   497                 if (srclen < 4) {
   498                     return SDL_ICONV_EINVAL;
   499                 }
   500                 ch = ((Uint32) p[3] << 24) |
   501                     ((Uint32) p[2] << 16) |
   502                     ((Uint32) p[1] << 8) | (Uint32) p[0];
   503                 src += 4;
   504                 srclen -= 4;
   505             }
   506             break;
   507         case ENCODING_UCS2:
   508             {
   509                 Uint16 *p = (Uint16 *) src;
   510                 if (srclen < 2) {
   511                     return SDL_ICONV_EINVAL;
   512                 }
   513                 ch = *p;
   514                 src += 2;
   515                 srclen -= 2;
   516             }
   517             break;
   518         case ENCODING_UCS4:
   519             {
   520                 Uint32 *p = (Uint32 *) src;
   521                 if (srclen < 4) {
   522                     return SDL_ICONV_EINVAL;
   523                 }
   524                 ch = *p;
   525                 src += 4;
   526                 srclen -= 4;
   527             }
   528             break;
   529         }
   530 
   531         /* Encode a character */
   532         switch (cd->dst_fmt) {
   533         case ENCODING_ASCII:
   534             {
   535                 Uint8 *p = (Uint8 *) dst;
   536                 if (dstlen < 1) {
   537                     return SDL_ICONV_E2BIG;
   538                 }
   539                 if (ch > 0x7F) {
   540                     *p = UNKNOWN_ASCII;
   541                 } else {
   542                     *p = (Uint8) ch;
   543                 }
   544                 ++dst;
   545                 --dstlen;
   546             }
   547             break;
   548         case ENCODING_LATIN1:
   549             {
   550                 Uint8 *p = (Uint8 *) dst;
   551                 if (dstlen < 1) {
   552                     return SDL_ICONV_E2BIG;
   553                 }
   554                 if (ch > 0xFF) {
   555                     *p = UNKNOWN_ASCII;
   556                 } else {
   557                     *p = (Uint8) ch;
   558                 }
   559                 ++dst;
   560                 --dstlen;
   561             }
   562             break;
   563         case ENCODING_UTF8:    /* RFC 3629 */
   564             {
   565                 Uint8 *p = (Uint8 *) dst;
   566                 if (ch > 0x10FFFF) {
   567                     ch = UNKNOWN_UNICODE;
   568                 }
   569                 if (ch <= 0x7F) {
   570                     if (dstlen < 1) {
   571                         return SDL_ICONV_E2BIG;
   572                     }
   573                     *p = (Uint8) ch;
   574                     ++dst;
   575                     --dstlen;
   576                 } else if (ch <= 0x7FF) {
   577                     if (dstlen < 2) {
   578                         return SDL_ICONV_E2BIG;
   579                     }
   580                     p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
   581                     p[1] = 0x80 | (Uint8) (ch & 0x3F);
   582                     dst += 2;
   583                     dstlen -= 2;
   584                 } else if (ch <= 0xFFFF) {
   585                     if (dstlen < 3) {
   586                         return SDL_ICONV_E2BIG;
   587                     }
   588                     p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
   589                     p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   590                     p[2] = 0x80 | (Uint8) (ch & 0x3F);
   591                     dst += 3;
   592                     dstlen -= 3;
   593                 } else if (ch <= 0x1FFFFF) {
   594                     if (dstlen < 4) {
   595                         return SDL_ICONV_E2BIG;
   596                     }
   597                     p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
   598                     p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   599                     p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   600                     p[3] = 0x80 | (Uint8) (ch & 0x3F);
   601                     dst += 4;
   602                     dstlen -= 4;
   603                 } else if (ch <= 0x3FFFFFF) {
   604                     if (dstlen < 5) {
   605                         return SDL_ICONV_E2BIG;
   606                     }
   607                     p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
   608                     p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   609                     p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   610                     p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   611                     p[4] = 0x80 | (Uint8) (ch & 0x3F);
   612                     dst += 5;
   613                     dstlen -= 5;
   614                 } else {
   615                     if (dstlen < 6) {
   616                         return SDL_ICONV_E2BIG;
   617                     }
   618                     p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
   619                     p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
   620                     p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
   621                     p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
   622                     p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
   623                     p[5] = 0x80 | (Uint8) (ch & 0x3F);
   624                     dst += 6;
   625                     dstlen -= 6;
   626                 }
   627             }
   628             break;
   629         case ENCODING_UTF16BE: /* RFC 2781 */
   630             {
   631                 Uint8 *p = (Uint8 *) dst;
   632                 if (ch > 0x10FFFF) {
   633                     ch = UNKNOWN_UNICODE;
   634                 }
   635                 if (ch < 0x10000) {
   636                     if (dstlen < 2) {
   637                         return SDL_ICONV_E2BIG;
   638                     }
   639                     p[0] = (Uint8) (ch >> 8);
   640                     p[1] = (Uint8) ch;
   641                     dst += 2;
   642                     dstlen -= 2;
   643                 } else {
   644                     Uint16 W1, W2;
   645                     if (dstlen < 4) {
   646                         return SDL_ICONV_E2BIG;
   647                     }
   648                     ch = ch - 0x10000;
   649                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   650                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   651                     p[0] = (Uint8) (W1 >> 8);
   652                     p[1] = (Uint8) W1;
   653                     p[2] = (Uint8) (W2 >> 8);
   654                     p[3] = (Uint8) W2;
   655                     dst += 4;
   656                     dstlen -= 4;
   657                 }
   658             }
   659             break;
   660         case ENCODING_UTF16LE: /* RFC 2781 */
   661             {
   662                 Uint8 *p = (Uint8 *) dst;
   663                 if (ch > 0x10FFFF) {
   664                     ch = UNKNOWN_UNICODE;
   665                 }
   666                 if (ch < 0x10000) {
   667                     if (dstlen < 2) {
   668                         return SDL_ICONV_E2BIG;
   669                     }
   670                     p[1] = (Uint8) (ch >> 8);
   671                     p[0] = (Uint8) ch;
   672                     dst += 2;
   673                     dstlen -= 2;
   674                 } else {
   675                     Uint16 W1, W2;
   676                     if (dstlen < 4) {
   677                         return SDL_ICONV_E2BIG;
   678                     }
   679                     ch = ch - 0x10000;
   680                     W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
   681                     W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
   682                     p[1] = (Uint8) (W1 >> 8);
   683                     p[0] = (Uint8) W1;
   684                     p[3] = (Uint8) (W2 >> 8);
   685                     p[2] = (Uint8) W2;
   686                     dst += 4;
   687                     dstlen -= 4;
   688                 }
   689             }
   690             break;
   691         case ENCODING_UTF32BE:
   692             {
   693                 Uint8 *p = (Uint8 *) dst;
   694                 if (ch > 0x10FFFF) {
   695                     ch = UNKNOWN_UNICODE;
   696                 }
   697                 if (dstlen < 4) {
   698                     return SDL_ICONV_E2BIG;
   699                 }
   700                 p[0] = (Uint8) (ch >> 24);
   701                 p[1] = (Uint8) (ch >> 16);
   702                 p[2] = (Uint8) (ch >> 8);
   703                 p[3] = (Uint8) ch;
   704                 dst += 4;
   705                 dstlen -= 4;
   706             }
   707             break;
   708         case ENCODING_UTF32LE:
   709             {
   710                 Uint8 *p = (Uint8 *) dst;
   711                 if (ch > 0x10FFFF) {
   712                     ch = UNKNOWN_UNICODE;
   713                 }
   714                 if (dstlen < 4) {
   715                     return SDL_ICONV_E2BIG;
   716                 }
   717                 p[3] = (Uint8) (ch >> 24);
   718                 p[2] = (Uint8) (ch >> 16);
   719                 p[1] = (Uint8) (ch >> 8);
   720                 p[0] = (Uint8) ch;
   721                 dst += 4;
   722                 dstlen -= 4;
   723             }
   724             break;
   725         case ENCODING_UCS2:
   726             {
   727                 Uint16 *p = (Uint16 *) dst;
   728                 if (ch > 0xFFFF) {
   729                     ch = UNKNOWN_UNICODE;
   730                 }
   731                 if (dstlen < 2) {
   732                     return SDL_ICONV_E2BIG;
   733                 }
   734                 *p = (Uint16) ch;
   735                 dst += 2;
   736                 dstlen -= 2;
   737             }
   738             break;
   739         case ENCODING_UCS4:
   740             {
   741                 Uint32 *p = (Uint32 *) dst;
   742                 if (ch > 0x7FFFFFFF) {
   743                     ch = UNKNOWN_UNICODE;
   744                 }
   745                 if (dstlen < 4) {
   746                     return SDL_ICONV_E2BIG;
   747                 }
   748                 *p = ch;
   749                 dst += 4;
   750                 dstlen -= 4;
   751             }
   752             break;
   753         }
   754 
   755         /* Update state */
   756         *inbuf = src;
   757         *inbytesleft = srclen;
   758         *outbuf = dst;
   759         *outbytesleft = dstlen;
   760         ++total;
   761     }
   762     return total;
   763 }
   764 
   765 int
   766 SDL_iconv_close(SDL_iconv_t cd)
   767 {
   768     if (cd && cd != (SDL_iconv_t) - 1) {
   769         SDL_free(cd);
   770     }
   771     return 0;
   772 }
   773 
   774 #endif /* !HAVE_ICONV */
   775 
   776 static const char *
   777 getlocale()
   778 {
   779     const char *lang;
   780 
   781     lang = SDL_getenv("LC_ALL");
   782     if (!lang) {
   783         lang = SDL_getenv("LC_CTYPE");
   784     }
   785     if (!lang) {
   786         lang = SDL_getenv("LC_MESSAGES");
   787     }
   788     if (!lang) {
   789         lang = SDL_getenv("LANG");
   790     }
   791     if (!lang || SDL_strcmp(lang, "C") == 0) {
   792         lang = "ASCII";
   793     }
   794     return lang;
   795 }
   796 
   797 char *
   798 SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
   799                  size_t inbytesleft)
   800 {
   801     SDL_iconv_t cd;
   802     char *string;
   803     size_t stringsize;
   804     char *outbuf;
   805     size_t outbytesleft;
   806     size_t retCode = 0;
   807 
   808     if (!fromcode || !*fromcode) {
   809         fromcode = getlocale();
   810     }
   811     if (!tocode || !*tocode) {
   812         tocode = getlocale();
   813     }
   814     cd = SDL_iconv_open(tocode, fromcode);
   815     if (cd == (SDL_iconv_t) - 1) {
   816         return NULL;
   817     }
   818 
   819     stringsize = inbytesleft > 4 ? inbytesleft : 4;
   820     string = SDL_malloc(stringsize);
   821     if (!string) {
   822         SDL_iconv_close(cd);
   823         return NULL;
   824     }
   825     outbuf = string;
   826     outbytesleft = stringsize;
   827     SDL_memset(outbuf, 0, 4);
   828 
   829     while (inbytesleft > 0) {
   830         retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
   831         switch (retCode) {
   832         case SDL_ICONV_E2BIG:
   833             {
   834                 char *oldstring = string;
   835                 stringsize *= 2;
   836                 string = SDL_realloc(string, stringsize);
   837                 if (!string) {
   838                     SDL_iconv_close(cd);
   839                     return NULL;
   840                 }
   841                 outbuf = string + (outbuf - oldstring);
   842                 outbytesleft = stringsize - (outbuf - string);
   843                 SDL_memset(outbuf, 0, 4);
   844             }
   845             break;
   846         case SDL_ICONV_EILSEQ:
   847             /* Try skipping some input data - not perfect, but... */
   848             ++inbuf;
   849             --inbytesleft;
   850             break;
   851         case SDL_ICONV_EINVAL:
   852         case SDL_ICONV_ERROR:
   853             /* We can't continue... */
   854             inbytesleft = 0;
   855             break;
   856         }
   857     }
   858     SDL_iconv_close(cd);
   859 
   860     return string;
   861 }
   862 
   863 /* vi: set ts=4 sw=4 expandtab: */