It turns out that UCS2 and UCS4 are defined as big-endian encodings
authorSam Lantinga <slouken@libsdl.org>
Sun, 28 Oct 2012 13:03:45 -0700
changeset 66104032f8efdfe7
parent 6609 747e2ac35db6
child 6611 5c8b5b03ad8a
It turns out that UCS2 and UCS4 are defined as big-endian encodings
include/SDL_stdinc.h
src/stdlib/SDL_iconv.c
     1.1 --- a/include/SDL_stdinc.h	Sat Oct 27 02:54:10 2012 -0700
     1.2 +++ b/include/SDL_stdinc.h	Sun Oct 28 13:03:45 2012 -0700
     1.3 @@ -748,8 +748,8 @@
     1.4                                                 const char *inbuf,
     1.5                                                 size_t inbytesleft);
     1.6  #define SDL_iconv_utf8_locale(S)	SDL_iconv_string("", "UTF-8", S, SDL_strlen(S)+1)
     1.7 -#define SDL_iconv_utf8_ucs2(S)		(Uint16 *)SDL_iconv_string("UCS-2", "UTF-8", S, SDL_strlen(S)+1)
     1.8 -#define SDL_iconv_utf8_ucs4(S)		(Uint32 *)SDL_iconv_string("UCS-4", "UTF-8", S, SDL_strlen(S)+1)
     1.9 +#define SDL_iconv_utf8_ucs2(S)		(Uint16 *)SDL_iconv_string("UCS-2-INTERNAL", "UTF-8", S, SDL_strlen(S)+1)
    1.10 +#define SDL_iconv_utf8_ucs4(S)		(Uint32 *)SDL_iconv_string("UCS-4-INTERNAL", "UTF-8", S, SDL_strlen(S)+1)
    1.11  
    1.12  /* Ends C function definitions when using C++ */
    1.13  #ifdef __cplusplus
     2.1 --- a/src/stdlib/SDL_iconv.c	Sat Oct 27 02:54:10 2012 -0700
     2.2 +++ b/src/stdlib/SDL_iconv.c	Sun Oct 28 13:03:45 2012 -0700
     2.3 @@ -87,15 +87,21 @@
     2.4      ENCODING_UTF32,             /* Needs byte order marker */
     2.5      ENCODING_UTF32BE,
     2.6      ENCODING_UTF32LE,
     2.7 -    ENCODING_UCS2,              /* Native byte order assumed */
     2.8 -    ENCODING_UCS4,              /* Native byte order assumed */
     2.9 +    ENCODING_UCS2BE,
    2.10 +    ENCODING_UCS2LE,
    2.11 +    ENCODING_UCS4BE,
    2.12 +    ENCODING_UCS4LE,
    2.13  };
    2.14  #if SDL_BYTEORDER == SDL_BIG_ENDIAN
    2.15  #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
    2.16  #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
    2.17 +#define ENCODING_UCS2NATIVE     ENCODING_UCS2BE
    2.18 +#define ENCODING_UCS4NATIVE     ENCODING_UCS4BE
    2.19  #else
    2.20  #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
    2.21  #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
    2.22 +#define ENCODING_UCS2NATIVE     ENCODING_UCS2LE
    2.23 +#define ENCODING_UCS4NATIVE     ENCODING_UCS4LE
    2.24  #endif
    2.25  
    2.26  struct _SDL_iconv_t
    2.27 @@ -128,10 +134,16 @@
    2.28      { "UTF-32BE", ENCODING_UTF32BE },
    2.29      { "UTF32LE", ENCODING_UTF32LE },
    2.30      { "UTF-32LE", ENCODING_UTF32LE },
    2.31 -    { "UCS2", ENCODING_UCS2 },
    2.32 -    { "UCS-2", ENCODING_UCS2 },
    2.33 -    { "UCS4", ENCODING_UCS4 },
    2.34 -    { "UCS-4", ENCODING_UCS4 },
    2.35 +    { "UCS2", ENCODING_UCS2BE },
    2.36 +    { "UCS-2", ENCODING_UCS2BE },
    2.37 +    { "UCS-2LE", ENCODING_UCS2LE },
    2.38 +    { "UCS-2BE", ENCODING_UCS2BE },
    2.39 +    { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
    2.40 +    { "UCS4", ENCODING_UCS4BE },
    2.41 +    { "UCS-4", ENCODING_UCS4BE },
    2.42 +    { "UCS-4LE", ENCODING_UCS4LE },
    2.43 +    { "UCS-4BE", ENCODING_UCS4BE },
    2.44 +    { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
    2.45  /* *INDENT-ON* */
    2.46  };
    2.47  
    2.48 @@ -518,6 +530,29 @@
    2.49                        (Uint32) (W2 & 0x3FF)) + 0x10000;
    2.50              }
    2.51              break;
    2.52 +        case ENCODING_UCS2LE:
    2.53 +            {
    2.54 +                Uint8 *p = (Uint8 *) src;
    2.55 +                if (srclen < 2) {
    2.56 +                    return SDL_ICONV_EINVAL;
    2.57 +                }
    2.58 +                ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
    2.59 +                src += 2;
    2.60 +                srclen -= 2;
    2.61 +            }
    2.62 +            break;
    2.63 +        case ENCODING_UCS2BE:
    2.64 +            {
    2.65 +                Uint8 *p = (Uint8 *) src;
    2.66 +                if (srclen < 2) {
    2.67 +                    return SDL_ICONV_EINVAL;
    2.68 +                }
    2.69 +                ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
    2.70 +                src += 2;
    2.71 +                srclen -= 2;
    2.72 +            }
    2.73 +            break;
    2.74 +        case ENCODING_UCS4BE:
    2.75          case ENCODING_UTF32BE:
    2.76              {
    2.77                  Uint8 *p = (Uint8 *) src;
    2.78 @@ -531,6 +566,7 @@
    2.79                  srclen -= 4;
    2.80              }
    2.81              break;
    2.82 +        case ENCODING_UCS4LE:
    2.83          case ENCODING_UTF32LE:
    2.84              {
    2.85                  Uint8 *p = (Uint8 *) src;
    2.86 @@ -544,28 +580,6 @@
    2.87                  srclen -= 4;
    2.88              }
    2.89              break;
    2.90 -        case ENCODING_UCS2:
    2.91 -            {
    2.92 -                Uint16 *p = (Uint16 *) src;
    2.93 -                if (srclen < 2) {
    2.94 -                    return SDL_ICONV_EINVAL;
    2.95 -                }
    2.96 -                ch = *p;
    2.97 -                src += 2;
    2.98 -                srclen -= 2;
    2.99 -            }
   2.100 -            break;
   2.101 -        case ENCODING_UCS4:
   2.102 -            {
   2.103 -                Uint32 *p = (Uint32 *) src;
   2.104 -                if (srclen < 4) {
   2.105 -                    return SDL_ICONV_EINVAL;
   2.106 -                }
   2.107 -                ch = *p;
   2.108 -                src += 4;
   2.109 -                srclen -= 4;
   2.110 -            }
   2.111 -            break;
   2.112          }
   2.113  
   2.114          /* Encode a character */
   2.115 @@ -728,12 +742,46 @@
   2.116                  }
   2.117              }
   2.118              break;
   2.119 -        case ENCODING_UTF32BE:
   2.120 +        case ENCODING_UCS2BE:
   2.121              {
   2.122                  Uint8 *p = (Uint8 *) dst;
   2.123 -                if (ch > 0x10FFFF) {
   2.124 +                if (ch > 0xFFFF) {
   2.125                      ch = UNKNOWN_UNICODE;
   2.126                  }
   2.127 +                if (dstlen < 2) {
   2.128 +                    return SDL_ICONV_E2BIG;
   2.129 +                }
   2.130 +                p[0] = (Uint8) (ch >> 8);
   2.131 +                p[1] = (Uint8) ch;
   2.132 +                dst += 2;
   2.133 +                dstlen -= 2;
   2.134 +            }
   2.135 +            break;
   2.136 +        case ENCODING_UCS2LE:
   2.137 +            {
   2.138 +                Uint8 *p = (Uint8 *) dst;
   2.139 +                if (ch > 0xFFFF) {
   2.140 +                    ch = UNKNOWN_UNICODE;
   2.141 +                }
   2.142 +                if (dstlen < 2) {
   2.143 +                    return SDL_ICONV_E2BIG;
   2.144 +                }
   2.145 +                p[1] = (Uint8) (ch >> 8);
   2.146 +                p[0] = (Uint8) ch;
   2.147 +                dst += 2;
   2.148 +                dstlen -= 2;
   2.149 +            }
   2.150 +            break;
   2.151 +        case ENCODING_UTF32BE:
   2.152 +            if (ch > 0x10FFFF) {
   2.153 +                ch = UNKNOWN_UNICODE;
   2.154 +            }
   2.155 +        case ENCODING_UCS4BE:
   2.156 +            if (ch > 0x7FFFFFFF) {
   2.157 +                ch = UNKNOWN_UNICODE;
   2.158 +            }
   2.159 +            {
   2.160 +                Uint8 *p = (Uint8 *) dst;
   2.161                  if (dstlen < 4) {
   2.162                      return SDL_ICONV_E2BIG;
   2.163                  }
   2.164 @@ -746,11 +794,15 @@
   2.165              }
   2.166              break;
   2.167          case ENCODING_UTF32LE:
   2.168 +            if (ch > 0x10FFFF) {
   2.169 +                ch = UNKNOWN_UNICODE;
   2.170 +            }
   2.171 +        case ENCODING_UCS4LE:
   2.172 +            if (ch > 0x7FFFFFFF) {
   2.173 +                ch = UNKNOWN_UNICODE;
   2.174 +            }
   2.175              {
   2.176                  Uint8 *p = (Uint8 *) dst;
   2.177 -                if (ch > 0x10FFFF) {
   2.178 -                    ch = UNKNOWN_UNICODE;
   2.179 -                }
   2.180                  if (dstlen < 4) {
   2.181                      return SDL_ICONV_E2BIG;
   2.182                  }
   2.183 @@ -762,34 +814,6 @@
   2.184                  dstlen -= 4;
   2.185              }
   2.186              break;
   2.187 -        case ENCODING_UCS2:
   2.188 -            {
   2.189 -                Uint16 *p = (Uint16 *) dst;
   2.190 -                if (ch > 0xFFFF) {
   2.191 -                    ch = UNKNOWN_UNICODE;
   2.192 -                }
   2.193 -                if (dstlen < 2) {
   2.194 -                    return SDL_ICONV_E2BIG;
   2.195 -                }
   2.196 -                *p = (Uint16) ch;
   2.197 -                dst += 2;
   2.198 -                dstlen -= 2;
   2.199 -            }
   2.200 -            break;
   2.201 -        case ENCODING_UCS4:
   2.202 -            {
   2.203 -                Uint32 *p = (Uint32 *) dst;
   2.204 -                if (ch > 0x7FFFFFFF) {
   2.205 -                    ch = UNKNOWN_UNICODE;
   2.206 -                }
   2.207 -                if (dstlen < 4) {
   2.208 -                    return SDL_ICONV_E2BIG;
   2.209 -                }
   2.210 -                *p = ch;
   2.211 -                dst += 4;
   2.212 -                dstlen -= 4;
   2.213 -            }
   2.214 -            break;
   2.215          }
   2.216  
   2.217          /* Update state */