From 0fe99b939f5335661944a97448d7d30f5575d360 Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Sun, 28 Oct 2012 13:03:45 -0700 Subject: [PATCH] It turns out that UCS2 and UCS4 are defined as big-endian encodings --- include/SDL_stdinc.h | 4 +- src/stdlib/SDL_iconv.c | 142 ++++++++++++++++++++++++----------------- 2 files changed, 85 insertions(+), 61 deletions(-) diff --git a/include/SDL_stdinc.h b/include/SDL_stdinc.h index 390b3ed8d..d5163436c 100644 --- a/include/SDL_stdinc.h +++ b/include/SDL_stdinc.h @@ -748,8 +748,8 @@ extern DECLSPEC char *SDLCALL SDL_iconv_string(const char *tocode, const char *inbuf, size_t inbytesleft); #define SDL_iconv_utf8_locale(S) SDL_iconv_string("", "UTF-8", S, SDL_strlen(S)+1) -#define SDL_iconv_utf8_ucs2(S) (Uint16 *)SDL_iconv_string("UCS-2", "UTF-8", S, SDL_strlen(S)+1) -#define SDL_iconv_utf8_ucs4(S) (Uint32 *)SDL_iconv_string("UCS-4", "UTF-8", S, SDL_strlen(S)+1) +#define SDL_iconv_utf8_ucs2(S) (Uint16 *)SDL_iconv_string("UCS-2-INTERNAL", "UTF-8", S, SDL_strlen(S)+1) +#define SDL_iconv_utf8_ucs4(S) (Uint32 *)SDL_iconv_string("UCS-4-INTERNAL", "UTF-8", S, SDL_strlen(S)+1) /* Ends C function definitions when using C++ */ #ifdef __cplusplus diff --git a/src/stdlib/SDL_iconv.c b/src/stdlib/SDL_iconv.c index e67be619b..89a80a2c7 100644 --- a/src/stdlib/SDL_iconv.c +++ b/src/stdlib/SDL_iconv.c @@ -87,15 +87,21 @@ enum ENCODING_UTF32, /* Needs byte order marker */ ENCODING_UTF32BE, ENCODING_UTF32LE, - ENCODING_UCS2, /* Native byte order assumed */ - ENCODING_UCS4, /* Native byte order assumed */ + ENCODING_UCS2BE, + ENCODING_UCS2LE, + ENCODING_UCS4BE, + ENCODING_UCS4LE, }; #if SDL_BYTEORDER == SDL_BIG_ENDIAN #define ENCODING_UTF16NATIVE ENCODING_UTF16BE #define ENCODING_UTF32NATIVE ENCODING_UTF32BE +#define ENCODING_UCS2NATIVE ENCODING_UCS2BE +#define ENCODING_UCS4NATIVE ENCODING_UCS4BE #else #define ENCODING_UTF16NATIVE ENCODING_UTF16LE #define ENCODING_UTF32NATIVE ENCODING_UTF32LE +#define ENCODING_UCS2NATIVE ENCODING_UCS2LE +#define ENCODING_UCS4NATIVE ENCODING_UCS4LE #endif struct _SDL_iconv_t @@ -128,10 +134,16 @@ static struct { "UTF-32BE", ENCODING_UTF32BE }, { "UTF32LE", ENCODING_UTF32LE }, { "UTF-32LE", ENCODING_UTF32LE }, - { "UCS2", ENCODING_UCS2 }, - { "UCS-2", ENCODING_UCS2 }, - { "UCS4", ENCODING_UCS4 }, - { "UCS-4", ENCODING_UCS4 }, + { "UCS2", ENCODING_UCS2BE }, + { "UCS-2", ENCODING_UCS2BE }, + { "UCS-2LE", ENCODING_UCS2LE }, + { "UCS-2BE", ENCODING_UCS2BE }, + { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE }, + { "UCS4", ENCODING_UCS4BE }, + { "UCS-4", ENCODING_UCS4BE }, + { "UCS-4LE", ENCODING_UCS4LE }, + { "UCS-4BE", ENCODING_UCS4BE }, + { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE }, /* *INDENT-ON* */ }; @@ -518,6 +530,29 @@ SDL_iconv(SDL_iconv_t cd, (Uint32) (W2 & 0x3FF)) + 0x10000; } break; + case ENCODING_UCS2LE: + { + Uint8 *p = (Uint8 *) src; + if (srclen < 2) { + return SDL_ICONV_EINVAL; + } + ch = ((Uint32) p[1] << 8) | (Uint32) p[0]; + src += 2; + srclen -= 2; + } + break; + case ENCODING_UCS2BE: + { + Uint8 *p = (Uint8 *) src; + if (srclen < 2) { + return SDL_ICONV_EINVAL; + } + ch = ((Uint32) p[0] << 8) | (Uint32) p[1]; + src += 2; + srclen -= 2; + } + break; + case ENCODING_UCS4BE: case ENCODING_UTF32BE: { Uint8 *p = (Uint8 *) src; @@ -531,6 +566,7 @@ SDL_iconv(SDL_iconv_t cd, srclen -= 4; } break; + case ENCODING_UCS4LE: case ENCODING_UTF32LE: { Uint8 *p = (Uint8 *) src; @@ -544,28 +580,6 @@ SDL_iconv(SDL_iconv_t cd, srclen -= 4; } break; - case ENCODING_UCS2: - { - Uint16 *p = (Uint16 *) src; - if (srclen < 2) { - return SDL_ICONV_EINVAL; - } - ch = *p; - src += 2; - srclen -= 2; - } - break; - case ENCODING_UCS4: - { - Uint32 *p = (Uint32 *) src; - if (srclen < 4) { - return SDL_ICONV_EINVAL; - } - ch = *p; - src += 4; - srclen -= 4; - } - break; } /* Encode a character */ @@ -728,64 +742,74 @@ SDL_iconv(SDL_iconv_t cd, } } break; - case ENCODING_UTF32BE: + case ENCODING_UCS2BE: { Uint8 *p = (Uint8 *) dst; - if (ch > 0x10FFFF) { + if (ch > 0xFFFF) { ch = UNKNOWN_UNICODE; } - if (dstlen < 4) { + if (dstlen < 2) { return SDL_ICONV_E2BIG; } - p[0] = (Uint8) (ch >> 24); - p[1] = (Uint8) (ch >> 16); - p[2] = (Uint8) (ch >> 8); - p[3] = (Uint8) ch; - dst += 4; - dstlen -= 4; + p[0] = (Uint8) (ch >> 8); + p[1] = (Uint8) ch; + dst += 2; + dstlen -= 2; } break; - case ENCODING_UTF32LE: + case ENCODING_UCS2LE: { Uint8 *p = (Uint8 *) dst; - if (ch > 0x10FFFF) { + if (ch > 0xFFFF) { ch = UNKNOWN_UNICODE; } - if (dstlen < 4) { + if (dstlen < 2) { return SDL_ICONV_E2BIG; } - p[3] = (Uint8) (ch >> 24); - p[2] = (Uint8) (ch >> 16); p[1] = (Uint8) (ch >> 8); p[0] = (Uint8) ch; - dst += 4; - dstlen -= 4; + dst += 2; + dstlen -= 2; } break; - case ENCODING_UCS2: + case ENCODING_UTF32BE: + if (ch > 0x10FFFF) { + ch = UNKNOWN_UNICODE; + } + case ENCODING_UCS4BE: + if (ch > 0x7FFFFFFF) { + ch = UNKNOWN_UNICODE; + } { - Uint16 *p = (Uint16 *) dst; - if (ch > 0xFFFF) { - ch = UNKNOWN_UNICODE; - } - if (dstlen < 2) { + Uint8 *p = (Uint8 *) dst; + if (dstlen < 4) { return SDL_ICONV_E2BIG; } - *p = (Uint16) ch; - dst += 2; - dstlen -= 2; + p[0] = (Uint8) (ch >> 24); + p[1] = (Uint8) (ch >> 16); + p[2] = (Uint8) (ch >> 8); + p[3] = (Uint8) ch; + dst += 4; + dstlen -= 4; } break; - case ENCODING_UCS4: + case ENCODING_UTF32LE: + if (ch > 0x10FFFF) { + ch = UNKNOWN_UNICODE; + } + case ENCODING_UCS4LE: + if (ch > 0x7FFFFFFF) { + ch = UNKNOWN_UNICODE; + } { - Uint32 *p = (Uint32 *) dst; - if (ch > 0x7FFFFFFF) { - ch = UNKNOWN_UNICODE; - } + Uint8 *p = (Uint8 *) dst; if (dstlen < 4) { return SDL_ICONV_E2BIG; } - *p = ch; + p[3] = (Uint8) (ch >> 24); + p[2] = (Uint8) (ch >> 16); + p[1] = (Uint8) (ch >> 8); + p[0] = (Uint8) ch; dst += 4; dstlen -= 4; }