From 10d16a3e86e9af84b09db3ff521c720d468354ec Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Mon, 13 Mar 2006 01:08:00 +0000 Subject: [PATCH] Added UTF-8 <-> UTF-16 <-> UTF-32 <-> UCS-2 <-> UCS-4 conversion capability --- configure.in | 4 +- include/SDL_config.h.in | 3 + include/SDL_stdinc.h | 35 ++ src/stdlib/SDL_iconv.c | 809 ++++++++++++++++++++++++++++++++++++++++ src/stdlib/SDL_string.c | 22 +- test/Makefile.in | 5 +- test/testiconv.c | 85 +++++ test/utf8.txt | Bin 0 -> 20334 bytes 8 files changed, 958 insertions(+), 5 deletions(-) create mode 100644 src/stdlib/SDL_iconv.c create mode 100644 test/testiconv.c create mode 100644 test/utf8.txt diff --git a/configure.in b/configure.in index f5182f34b..65ae93818 100644 --- a/configure.in +++ b/configure.in @@ -94,7 +94,7 @@ if test x$enable_libc = xyes; then dnl Check for C library headers AC_HEADER_STDC - AC_CHECK_HEADERS(sys/types.h stdio.h stdlib.h stddef.h stdarg.h malloc.h memory.h string.h strings.h inttypes.h stdint.h ctype.h math.h signal.h) + AC_CHECK_HEADERS(sys/types.h stdio.h stdlib.h stddef.h stdarg.h malloc.h memory.h string.h strings.h inttypes.h stdint.h ctype.h math.h iconv.h signal.h) dnl Check for typedefs, structures, etc. AC_TYPE_SIZE_T @@ -116,7 +116,7 @@ if test x$enable_libc = xyes; then if test x$ac_cv_func_strtod = xyes; then AC_DEFINE(HAVE_STRTOD) fi - AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp stricmp strcasecmp sscanf snprintf vsnprintf sigaction setjmp nanosleep) + AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp stricmp strcasecmp strncasecmp sscanf snprintf vsnprintf iconv sigaction setjmp nanosleep) AC_CHECK_LIB(m, pow, [BUILD_LDFLAGS="$BUILD_LDFLAGS -lm"]) fi diff --git a/include/SDL_config.h.in b/include/SDL_config.h.in index 901cd915f..40e4fa88e 100644 --- a/include/SDL_config.h.in +++ b/include/SDL_config.h.in @@ -68,6 +68,7 @@ #undef HAVE_STDINT_H #undef HAVE_CTYPE_H #undef HAVE_MATH_H +#undef HAVE_ICONV_H #undef HAVE_SIGNAL_H #undef HAVE_ALTIVEC_H @@ -118,9 +119,11 @@ #undef HAVE_STRNCMP #undef HAVE_STRICMP #undef HAVE_STRCASECMP +#undef HAVE_STRNCASECMP #undef HAVE_SSCANF #undef HAVE_SNPRINTF #undef HAVE_VSNPRINTF +#undef HAVE_ICONV #undef HAVE_SIGACTION #undef HAVE_SETJMP #undef HAVE_NANOSLEEP diff --git a/include/SDL_stdinc.h b/include/SDL_stdinc.h index 557bf38a4..5a24eb7b3 100644 --- a/include/SDL_stdinc.h +++ b/include/SDL_stdinc.h @@ -70,6 +70,9 @@ #if HAVE_CTYPE_H # include #endif +#if HAVE_ICONV_H +# include +#endif /* The number of elements in an array */ #define SDL_arraysize(array) (sizeof(array)/sizeof(array[0])) @@ -518,6 +521,12 @@ extern DECLSPEC int SDLCALL SDL_strncmp(const char *str1, const char *str2, size extern DECLSPEC int SDLCALL SDL_strcasecmp(const char *str1, const char *str2); #endif +#if HAVE_STRNCASECMP +#define SDL_strncasecmp strncasecmp +#else +extern DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen); +#endif + #if HAVE_SSCANF #define SDL_sscanf sscanf #else @@ -536,6 +545,32 @@ extern DECLSPEC int SDLCALL SDL_snprintf(char *text, size_t maxlen, const char * extern DECLSPEC int SDLCALL SDL_vsnprintf(char *text, size_t maxlen, const char *fmt, va_list ap); #endif +/* The SDL implementation of iconv() returns these error codes */ +#define SDL_ICONV_ERROR (size_t)-1 +#define SDL_ICONV_E2BIG (size_t)-2 +#define SDL_ICONV_EILSEQ (size_t)-3 +#define SDL_ICONV_EINVAL (size_t)-4 + +#if HAVE_ICONV +#define SDL_iconv_t iconv_t +#define SDL_iconv_open iconv_open +#define SDL_iconv_close iconv_close +extern DECLSPEC size_t SDLCALL SDL_iconv(SDL_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +#else +typedef struct _SDL_iconv_t *SDL_iconv_t; +extern DECLSPEC SDL_iconv_t SDLCALL SDL_iconv_open(const char *tocode, const char *fromcode); +extern DECLSPEC int SDLCALL SDL_iconv_close(SDL_iconv_t cd); +extern DECLSPEC size_t SDLCALL SDL_iconv(SDL_iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +#endif +/* This function converts a string between encodings in one pass, returning a + string that must be freed with SDL_free() or NULL on error. +*/ +extern DECLSPEC char * SDLCALL SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft); +#define SDL_iconv_utf8_ascii(S) SDL_iconv_string("ASCII", "UTF-8", S, SDL_strlen(S)+1) +#define SDL_iconv_utf8_latin1(S) SDL_iconv_string("LATIN1", "UTF-8", S, SDL_strlen(S)+1) +#define SDL_iconv_utf8_ucs2(S) (Uint16 *)SDL_iconv_string("UCS-2", "UTF-8", S, SDL_strlen(S)+1) +#define SDL_iconv_utf8_ucs4(S) (Uint32 *)SDL_iconv_string("UCS-4", "UTF-8", S, SDL_strlen(S)+1) + /* Ends C function definitions when using C++ */ #ifdef __cplusplus } diff --git a/src/stdlib/SDL_iconv.c b/src/stdlib/SDL_iconv.c new file mode 100644 index 000000000..517f7b791 --- /dev/null +++ b/src/stdlib/SDL_iconv.c @@ -0,0 +1,809 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2006 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +/* This file contains portable iconv functions for SDL */ + +#include "SDL_stdinc.h" +#include "SDL_endian.h" + +#ifdef HAVE_ICONV + +#include + +size_t SDL_iconv(SDL_iconv_t cd, + char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + size_t retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft); + if ( retCode == (size_t)-1 ) { + switch(errno) { + case E2BIG: + return SDL_ICONV_E2BIG; + case EILSEQ: + return SDL_ICONV_EILSEQ; + case EINVAL: + return SDL_ICONV_EINVAL; + default: + return SDL_ICONV_ERROR; + } + } + return retCode; +} + +#else + +#define UNICODE_BOM 0xFEFF + +#define UNKNOWN_ASCII '?' +#define UNKNOWN_UNICODE 0xFFFD + +enum { + ENCODING_UNKNOWN, + ENCODING_ASCII, + ENCODING_LATIN1, + ENCODING_UTF8, + ENCODING_UTF16, /* Needs byte order marker */ + ENCODING_UTF16BE, + ENCODING_UTF16LE, + ENCODING_UTF32, /* Needs byte order marker */ + ENCODING_UTF32BE, + ENCODING_UTF32LE, + ENCODING_UCS2, /* Native byte order assumed */ + ENCODING_UCS4, /* Native byte order assumed */ +}; +#if SDL_BYTEORDER == SDL_BIG_ENDIAN +#define ENCODING_UTF16NATIVE ENCODING_UTF16BE +#define ENCODING_UTF32NATIVE ENCODING_UTF32BE +#else +#define ENCODING_UTF16NATIVE ENCODING_UTF16LE +#define ENCODING_UTF32NATIVE ENCODING_UTF32LE +#endif + +struct _SDL_iconv_t +{ + int src_fmt; + int dst_fmt; +}; + +static struct { + const char *name; + int format; +} encodings[] = { + { "ASCII", ENCODING_ASCII }, + { "US-ASCII", ENCODING_ASCII }, + { "LATIN1", ENCODING_LATIN1 }, + { "ISO-8859-1", ENCODING_LATIN1 }, + { "UTF8", ENCODING_UTF8 }, + { "UTF-8", ENCODING_UTF8 }, + { "UTF16", ENCODING_UTF16 }, + { "UTF-16", ENCODING_UTF16 }, + { "UTF16BE", ENCODING_UTF16BE }, + { "UTF-16BE", ENCODING_UTF16BE }, + { "UTF16LE", ENCODING_UTF16LE }, + { "UTF-16LE", ENCODING_UTF16LE }, + { "UTF32", ENCODING_UTF32 }, + { "UTF-32", ENCODING_UTF32 }, + { "UTF32BE", ENCODING_UTF32BE }, + { "UTF-32BE", ENCODING_UTF32BE }, + { "UTF32LE", ENCODING_UTF32LE }, + { "UTF-32LE", ENCODING_UTF32LE }, + { "UCS2", ENCODING_UCS2 }, + { "UCS-2", ENCODING_UCS2 }, + { "UCS4", ENCODING_UCS4 }, + { "UCS-4", ENCODING_UCS4 }, +}; + +SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode) +{ + int src_fmt = ENCODING_UNKNOWN; + int dst_fmt = ENCODING_UNKNOWN; + int i; + + for ( i = 0; i < SDL_arraysize(encodings); ++i ) { + if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) { + src_fmt = encodings[i].format; + if ( dst_fmt != ENCODING_UNKNOWN ) { + break; + } + } + if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) { + dst_fmt = encodings[i].format; + if ( src_fmt != ENCODING_UNKNOWN ) { + break; + } + } + } + if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) { + SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd)); + if ( cd ) { + cd->src_fmt = src_fmt; + cd->dst_fmt = dst_fmt; + return cd; + } + } + return (SDL_iconv_t)-1; +} + +size_t SDL_iconv(SDL_iconv_t cd, + char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + /* For simplicity, we'll convert everything to and from UCS-4 */ + char *src, *dst; + size_t srclen, dstlen; + Uint32 ch; + size_t total; + + if ( !inbuf || !*inbuf ) { + /* Reset the context */ + return 0; + } + if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) { + return SDL_ICONV_E2BIG; + } + src = *inbuf; + srclen = (inbytesleft ? *inbytesleft : 0); + dst = *outbuf; + dstlen = *outbytesleft; + + switch ( cd->src_fmt ) { + case ENCODING_UTF16: + /* Scan for a byte order marker */ + { + Uint8 *p = (Uint8 *)src; + size_t n = srclen / 2; + while ( n ) { + if ( p[0] == 0xFF && p[1] == 0xFE ) { + cd->src_fmt = ENCODING_UTF16BE; + break; + } else if ( p[0] == 0xFE && p[1] == 0xFF ) { + cd->src_fmt = ENCODING_UTF16LE; + break; + } + p += 2; + --n; + } + if ( n == 0 ) { + /* We can't tell, default to host order */ + cd->src_fmt = ENCODING_UTF16NATIVE; + } + } + break; + case ENCODING_UTF32: + /* Scan for a byte order marker */ + { + Uint8 *p = (Uint8 *)src; + size_t n = srclen / 4; + while ( n ) { + if ( p[0] == 0xFF && p[1] == 0xFE && + p[2] == 0x00 && p[3] == 0x00 ) { + cd->src_fmt = ENCODING_UTF32BE; + break; + } else if ( p[0] == 0x00 && p[1] == 0x00 && + p[2] == 0xFE && p[3] == 0xFF ) { + cd->src_fmt = ENCODING_UTF32LE; + break; + } + p += 4; + --n; + } + if ( n == 0 ) { + /* We can't tell, default to host order */ + cd->src_fmt = ENCODING_UTF32NATIVE; + } + } + break; + } + + switch ( cd->dst_fmt ) { + case ENCODING_UTF16: + /* Default to host order, need to add byte order marker */ + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + *(Uint16 *)dst = UNICODE_BOM; + dst += 2; + dstlen -= 2; + cd->dst_fmt = ENCODING_UTF16NATIVE; + break; + case ENCODING_UTF32: + /* Default to host order, need to add byte order marker */ + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + *(Uint32 *)dst = UNICODE_BOM; + dst += 4; + dstlen -= 4; + cd->dst_fmt = ENCODING_UTF32NATIVE; + break; + } + + total = 0; + while ( srclen > 0 ) { + /* Decode a character */ + switch ( cd->src_fmt ) { + case ENCODING_ASCII: + { + Uint8 *p = (Uint8 *)src; + ch = (Uint32)(p[0] & 0x7F); + ++src; + --srclen; + } + break; + case ENCODING_LATIN1: + { + Uint8 *p = (Uint8 *)src; + ch = (Uint32)p[0]; + ++src; + --srclen; + } + break; + case ENCODING_UTF8: /* RFC 3629 */ + { + Uint8 *p = (Uint8 *)src; + size_t left = 0; + SDL_bool overlong = SDL_FALSE; + if ( p[0] >= 0xFC ) { + if ( (p[0] & 0xFE) != 0xFC ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xFC ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x01); + left = 5; + } + } else if ( p[0] >= 0xF8 ) { + if ( (p[0] & 0xFC) != 0xF8 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xF8 ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x03); + left = 4; + } + } else if ( p[0] >= 0xF0 ) { + if ( (p[0] & 0xF8) != 0xF0 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xF0 ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x07); + left = 3; + } + } else if ( p[0] >= 0xE0 ) { + if ( (p[0] & 0xF0) != 0xE0 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( p[0] == 0xE0 ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x0F); + left = 2; + } + } else if ( p[0] >= 0xC0 ) { + if ( (p[0] & 0xE0) != 0xC0 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + if ( (p[0] & 0xCE) == 0xC0 ) { + overlong = SDL_TRUE; + } + ch = (Uint32)(p[0] & 0x1F); + left = 1; + } + } else { + if ( (p[0] & 0x80) != 0x00 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } else { + ch = (Uint32)p[0]; + } + } + ++src; + --srclen; + if ( srclen < left ) { + return SDL_ICONV_EINVAL; + } + while ( left-- ) { + ++p; + if ( (p[0] & 0xC0) != 0x80 ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch <<= 6; + ch |= (p[0] & 0x3F); + ++src; + --srclen; + } + if ( overlong ) { + /* Potential security risk + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } + if ( (ch >= 0xD800 && ch <= 0xDFFF) || + (ch == 0xFFFE || ch == 0xFFFF) ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + } + } + break; + case ENCODING_UTF16BE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)src; + Uint16 W1, W2; + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + W1 = ((Uint32)p[0] << 8) | + (Uint32)p[1]; + src += 2; + srclen -= 2; + if ( W1 < 0xD800 || W1 > 0xDFFF ) { + ch = (Uint32)W1; + break; + } + if ( W1 > 0xDBFF ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + p = src; + W2 = ((Uint32)p[0] << 8) | + (Uint32)p[1]; + src += 2; + srclen -= 2; + if ( W2 < 0xDC00 || W2 > 0xDFFF ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch = (((Uint32)(W1 & 0x3FF) << 10) | + (Uint32)(W2 & 0x3FF)) + 0x10000; + } + break; + case ENCODING_UTF16LE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)src; + Uint16 W1, W2; + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + W1 = ((Uint32)p[1] << 8) | + (Uint32)p[0]; + src += 2; + srclen -= 2; + if ( W1 < 0xD800 || W1 > 0xDFFF ) { + ch = (Uint32)W1; + break; + } + if ( W1 > 0xDBFF ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + p = src; + W2 = ((Uint32)p[1] << 8) | + (Uint32)p[0]; + src += 2; + srclen -= 2; + if ( W2 < 0xDC00 || W2 > 0xDFFF ) { + /* Skip illegal sequences + return SDL_ICONV_EILSEQ; + */ + ch = UNKNOWN_UNICODE; + break; + } + ch = (((Uint32)(W1 & 0x3FF) << 10) | + (Uint32)(W2 & 0x3FF)) + 0x10000; + } + break; + case ENCODING_UTF32BE: + { + Uint8 *p = (Uint8 *)src; + if ( srclen < 4 ) { + return SDL_ICONV_EINVAL; + } + ch = ((Uint32)p[0] << 24) | + ((Uint32)p[1] << 16) | + ((Uint32)p[2] << 8) | + (Uint32)p[3]; + src += 4; + srclen -= 4; + } + break; + case ENCODING_UTF32LE: + { + Uint8 *p = (Uint8 *)src; + if ( srclen < 4 ) { + return SDL_ICONV_EINVAL; + } + ch = ((Uint32)p[3] << 24) | + ((Uint32)p[2] << 16) | + ((Uint32)p[1] << 8) | + (Uint32)p[0]; + src += 4; + srclen -= 4; + } + break; + case ENCODING_UCS2: + { + Uint16 *p = (Uint16 *)src; + if ( srclen < 2 ) { + return SDL_ICONV_EINVAL; + } + ch = *p; + src += 2; + srclen -= 2; + } + break; + case ENCODING_UCS4: + { + Uint32 *p = (Uint32 *)src; + if ( srclen < 4 ) { + return SDL_ICONV_EINVAL; + } + ch = *p; + src += 4; + srclen -= 4; + } + break; + } + + /* Encode a character */ + switch ( cd->dst_fmt ) { + case ENCODING_ASCII: + { + Uint8 *p = (Uint8 *)dst; + if ( dstlen < 1 ) { + return SDL_ICONV_E2BIG; + } + if ( ch > 0x7F ) { + *p = UNKNOWN_ASCII; + } else { + *p = (Uint8)ch; + } + ++dst; + --dstlen; + } + break; + case ENCODING_LATIN1: + { + Uint8 *p = (Uint8 *)dst; + if ( dstlen < 1 ) { + return SDL_ICONV_E2BIG; + } + if ( ch > 0xFF ) { + *p = UNKNOWN_ASCII; + } else { + *p = (Uint8)ch; + } + ++dst; + --dstlen; + } + break; + case ENCODING_UTF8: /* RFC 3629 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch <= 0x7F ) { + if ( dstlen < 1 ) { + return SDL_ICONV_E2BIG; + } + *p = (Uint8)ch; + ++dst; + --dstlen; + } else if ( ch <= 0x7FF ) { + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); + p[1] = 0x80 | (Uint8)(ch & 0x3F); + dst += 2; + dstlen -= 2; + } else if ( ch <= 0xFFFF ) { + if ( dstlen < 3 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); + p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[2] = 0x80 | (Uint8)(ch & 0x3F); + dst += 3; + dstlen -= 3; + } else if ( ch <= 0x1FFFFF ) { + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); + p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[3] = 0x80 | (Uint8)(ch & 0x3F); + dst += 4; + dstlen -= 4; + } else if ( ch <= 0x3FFFFFF ) { + if ( dstlen < 5 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03); + p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[4] = 0x80 | (Uint8)(ch & 0x3F); + dst += 5; + dstlen -= 5; + } else { + if ( dstlen < 6 ) { + return SDL_ICONV_E2BIG; + } + p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01); + p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F); + p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F); + p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F); + p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F); + p[5] = 0x80 | (Uint8)(ch & 0x3F); + dst += 6; + dstlen -= 6; + } + } + break; + case ENCODING_UTF16BE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch < 0x10000 ) { + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + p[0] = (Uint8)(ch >> 8); + p[1] = (Uint8)ch; + dst += 2; + dstlen -= 2; + } else { + Uint16 W1, W2; + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + ch = ch - 0x10000; + W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); + W2 = 0xDC00 | (Uint16)(ch & 0x3FF); + p[0] = (Uint8)(W1 >> 8); + p[1] = (Uint8)W1; + p[2] = (Uint8)(W2 >> 8); + p[3] = (Uint8)W2; + dst += 4; + dstlen -= 4; + } + } + break; + case ENCODING_UTF16LE: /* RFC 2781 */ + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x10FFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( ch < 0x10000 ) { + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + p[1] = (Uint8)(ch >> 8); + p[0] = (Uint8)ch; + dst += 2; + dstlen -= 2; + } else { + Uint16 W1, W2; + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + ch = ch - 0x10000; + W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); + W2 = 0xDC00 | (Uint16)(ch & 0x3FF); + p[1] = (Uint8)(W1 >> 8); + p[0] = (Uint8)W1; + p[3] = (Uint8)(W2 >> 8); + p[2] = (Uint8)W2; + dst += 4; + dstlen -= 4; + } + } + break; + case ENCODING_UTF32BE: + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + p[0] = (Uint8)(ch >> 24); + p[1] = (Uint8)(ch >> 16); + p[2] = (Uint8)(ch >> 8); + p[3] = (Uint8)ch; + dst += 4; + dstlen -= 4; + } + break; + case ENCODING_UTF32LE: + { + Uint8 *p = (Uint8 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + p[3] = (Uint8)(ch >> 24); + p[2] = (Uint8)(ch >> 16); + p[1] = (Uint8)(ch >> 8); + p[0] = (Uint8)ch; + dst += 4; + dstlen -= 4; + } + break; + case ENCODING_UCS2: + { + Uint16 *p = (Uint16 *)dst; + if ( ch > 0xFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 2 ) { + return SDL_ICONV_E2BIG; + } + *p = (Uint16)ch; + dst += 2; + dstlen -= 2; + } + break; + case ENCODING_UCS4: + { + Uint32 *p = (Uint32 *)dst; + if ( ch > 0x7FFFFFFF ) { + ch = UNKNOWN_UNICODE; + } + if ( dstlen < 4 ) { + return SDL_ICONV_E2BIG; + } + *p = ch; + dst += 4; + dstlen -= 4; + } + break; + } + + /* Update state */ + *inbuf = src; + *inbytesleft = srclen; + *outbuf = dst; + *outbytesleft = dstlen; + ++total; + } + return total; +} + +int SDL_iconv_close(SDL_iconv_t cd) +{ + if ( cd && cd != (SDL_iconv_t)-1 ) { + SDL_free(cd); + } + return 0; +} + +#endif /* !HAVE_ICONV */ + +char *SDL_iconv_string(const char *tocode, const char *fromcode, char *inbuf, size_t inbytesleft) +{ + SDL_iconv_t cd; + char *string; + size_t stringsize; + char *outbuf; + size_t outbytesleft; + size_t retCode = 0; + + cd = SDL_iconv_open(tocode, fromcode); + if ( cd == (SDL_iconv_t)-1 ) { + return NULL; + } + + stringsize = inbytesleft > 4 ? inbytesleft : 4; + string = SDL_malloc(stringsize); + if ( !string ) { + SDL_iconv_close(cd); + return NULL; + } + outbuf = string; + outbytesleft = stringsize; + SDL_memset(outbuf, 0, 4); + + while ( inbytesleft > 0 ) { + retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + switch (retCode) { + case SDL_ICONV_E2BIG: + { + char *oldstring = string; + stringsize *= 2; + string = SDL_realloc(string, stringsize); + if ( !string ) { + SDL_iconv_close(cd); + return NULL; + } + outbuf = string + (outbuf - oldstring); + outbytesleft = stringsize - (outbuf - string); + SDL_memset(outbuf, 0, 4); + } + break; + case SDL_ICONV_EILSEQ: + /* Try skipping some input data - not perfect, but... */ + ++inbuf; + --inbytesleft; + break; + case SDL_ICONV_EINVAL: + case SDL_ICONV_ERROR: + /* We can't continue... */ + inbytesleft = 0; + break; + } + } + SDL_iconv_close(cd); + + return string; +} diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c index 9c533fe02..3c53b6dfd 100644 --- a/src/stdlib/SDL_string.c +++ b/src/stdlib/SDL_string.c @@ -661,12 +661,12 @@ int SDL_strncmp(const char *str1, const char *str2, size_t maxlen) } #endif -#ifndef HAVE_STRCASECMP +#if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP) int SDL_strcasecmp(const char *str1, const char *str2) { char a = 0; char b = 0; - while (*str1 && *str2) { + while ( *str1 && *str2 ) { a = SDL_tolower(*str1); b = SDL_tolower(*str2); if ( a != b ) @@ -678,6 +678,24 @@ int SDL_strcasecmp(const char *str1, const char *str2) } #endif +#ifndef HAVE_STRNCASECMP +int SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen) +{ + char a = 0; + char b = 0; + while ( *str1 && *str2 && maxlen ) { + a = SDL_tolower(*str1); + b = SDL_tolower(*str2); + if ( a != b ) + break; + ++str1; + ++str2; + --maxlen; + } + return (int)((unsigned char)a - (unsigned char)b); +} +#endif + #ifndef HAVE_SSCANF int SDL_sscanf(const char *text, const char *fmt, ...) { diff --git a/test/Makefile.in b/test/Makefile.in index c88e461b9..52fd803e5 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -7,7 +7,7 @@ EXE = @EXE@ CFLAGS = @CFLAGS@ LIBS = @LIBS@ -TARGETS = checkkeys$(EXE) graywin$(EXE) loopwave$(EXE) testalpha$(EXE) testbitmap$(EXE) testblitspeed$(EXE) testcdrom$(EXE) testdyngl$(EXE) testerror$(EXE) testfile$(EXE) testgamma$(EXE) testgl$(EXE) testhread$(EXE) testjoystick$(EXE) testkeys$(EXE) testlock$(EXE) testoverlay2$(EXE) testoverlay$(EXE) testpalette$(EXE) testplatform$(EXE) testsem$(EXE) testsprite$(EXE) testtimer$(EXE) testver$(EXE) testvidinfo$(EXE) testwin$(EXE) testwm$(EXE) threadwin$(EXE) torturethread$(EXE) +TARGETS = checkkeys$(EXE) graywin$(EXE) loopwave$(EXE) testalpha$(EXE) testbitmap$(EXE) testblitspeed$(EXE) testcdrom$(EXE) testdyngl$(EXE) testerror$(EXE) testfile$(EXE) testgamma$(EXE) testgl$(EXE) testhread$(EXE) testiconv$(EXE) testjoystick$(EXE) testkeys$(EXE) testlock$(EXE) testoverlay2$(EXE) testoverlay$(EXE) testpalette$(EXE) testplatform$(EXE) testsem$(EXE) testsprite$(EXE) testtimer$(EXE) testver$(EXE) testvidinfo$(EXE) testwin$(EXE) testwm$(EXE) threadwin$(EXE) torturethread$(EXE) all: $(TARGETS) @@ -50,6 +50,9 @@ testgl$(EXE): $(srcdir)/testgl.c testhread$(EXE): $(srcdir)/testhread.c $(CC) -o $@ $? $(CFLAGS) $(LIBS) +testiconv$(EXE): $(srcdir)/testiconv.c + $(CC) -o $@ $? $(CFLAGS) $(LIBS) + testjoystick$(EXE): $(srcdir)/testjoystick.c $(CC) -o $@ $? $(CFLAGS) $(LIBS) diff --git a/test/testiconv.c b/test/testiconv.c new file mode 100644 index 000000000..62bda5c55 --- /dev/null +++ b/test/testiconv.c @@ -0,0 +1,85 @@ + +#include + +#include "SDL.h" + +static SDL_bool testutf16(char *data) +{ + Uint32 *p = (Uint32 *)data; + while(*p) { + if ( *p > 0x10FFFF ) { + return SDL_FALSE; + } + ++p; + } + return SDL_TRUE; +} + +static size_t widelen(char *data) +{ + size_t len = 0; + Uint32 *p = (Uint32 *)data; + while(*p++) { + ++len; + } + return len; +} + +int main(int argc, char *argv[]) +{ + const char * formats[] = { + "UTF8", + "UTF-8", + "UTF16BE", + "UTF-16BE", + "UTF16LE", + "UTF-16LE", + "UTF32BE", + "UTF-32BE", + "UTF32LE", + "UTF-32LE", + "UCS4", + "UCS-4", + }; + char buffer[BUFSIZ]; + char *ucs4; + char *test[2]; + int i, j, index = 0; + FILE *file; + int errors = 0; + + if ( !argv[1] ) { + argv[1] = "utf8.txt"; + } + file = fopen(argv[1], "rb"); + if ( !file ) { + fprintf(stderr, "Unable to open %s\n", argv[1]); + return (1); + } + + while ( fgets(buffer, sizeof(buffer), file) ) { + /* Convert to UCS-4 */ + ucs4 = SDL_iconv_string("UCS-4", "UTF-8", buffer, SDL_strlen(buffer)+1); + size_t len = (widelen(ucs4)+1)*4; + for ( i = 0; i < SDL_arraysize(formats); ++i ) { + if ( (SDL_strncasecmp(formats[i], "UTF16", 5) == 0 || + SDL_strncasecmp(formats[i], "UTF-16", 6) == 0) && + !testutf16(ucs4) ) { + continue; + } + test[0] = SDL_iconv_string(formats[i], "UCS-4", ucs4, len); + test[1] = SDL_iconv_string("UCS-4", formats[i], test[0], len); + if ( SDL_memcmp(test[1], ucs4, len) != 0 ) { + fprintf(stderr, "FAIL: %s\n", formats[i]); + ++errors; + } + SDL_free(test[0]); + SDL_free(test[1]); + } + test[0] = SDL_iconv_string("UTF-8", "UCS-4", ucs4, len); + SDL_free(ucs4); + fputs(test[0], stdout); + SDL_free(test[0]); + } + return (errors ? errors + 1 : 0); +} diff --git a/test/utf8.txt b/test/utf8.txt new file mode 100644 index 0000000000000000000000000000000000000000..abd16f7253bf7254ad1ae04cb5bee8d8c47e407a GIT binary patch literal 20334 zcmdU1X>%Ocb-l=P@_l?QkaQ+KqMb=kWH&#~HSJo~$&h?IO#*?-m zc%Ubgju-S~81=~ARry~yhNh5G^057n(4B1 z-Jl)CU9Yt)Q7lo~@#2A>c;bht2ToHz+%uR?;yu;(!lswhMM+X>Kw223()HXhl{Auu zCoaH4V!C)(1yz@8sk9={!DoAbZN`2Bgt*~QdTyNh&3@ow^nTimx?SUnX>wt*17rPAz z)GE5+b=(6;DHb=Cgmm-*H)9tx`CZcBhe_(WATM%qC1CJyU)%=BH>6_>9koDo+V zLQs$lfz)-uJLDAvP==4s@nApfCw(`-P`!Q%VP8G$d9hE)gDPo5PL>ZR?SRy7gyGtL zoTMwMPwDl+c&u6;Glm2YL9lou@LaqSg&uTj?4lP(5Cu4-)I2{fgff(VLK{#CQ{{D9 zq|vJcu)PF24`6-G=m6dVA%rbA9?}L{K3PgiHa2NV;wCcif`C6WiwuSWR^fTA%#?Gp zqik-3KEK1mEzjviFjCkwg^)nkK}$j?9(L4hLW|&qrl@wFVP*6`)WpXvfwh*ek+1J39Sl`y({PyUv>iuXYRwo~Fm!U$wRSqC&$svCdeo15fXJl* zDAN*bHU{fXI|Ba=#F8QyxBDb2@b=tbJhqjLR|9PLK+X9_%WELc>HrMiEx+An zd`GTfTxa@1{9y!DbA#~0HbP7kBCx!TkqH*o@tV-ypyR+x5smqBvafgZ2u0L{Bu0Fm~gU2Wn0TThEIHF!8 zzGf+Cnr4)EG$9vi)68YFWHWY?4r34!B#VxQCTZ)OSr~Q$L^vh}r|BX}2=X}f-1K5< z`5{<}$s9@rByf31d8rq$fI*bvyH4BlTIY;;w@jjedU9Y7iedni z+NcJPIc$18vh~)RzPp+g1|5eS9F9dx>?kmdP#0`5x@!@kpZ= zvO3BFgbO!-LxpgI1L)2qDg;pa8|a%JQ1cS1vlvFr{0fW!#_b9mtBur7r)kn4RG~}s zX>g_mU*e~Ib+<$-$WoPf4i5x<9TX(w@kYorUT>iuXt8K(3_&}{D#fha9c^Nrk>zr; zF;T@>4W=6K@x3ANow5%u(~3jH0u7cL&grU|38{rZiq0b}o3)-+qlYmhm*vFQ$T{(= zGhZAzYa&mw%Cbevv?*#tUSS3FcF7H_+Z z_tB%OfFlLm5ZRaj+#VzF_A_%LZi;M+b;O9E6T*>$Ja7J5ctctfCfR-EtP@4TGFv-dnpR^Zv4`)+pq6mU(aO zmhoP`Srt$PSf4g;%=pTHcW=&#IBDK%1S}(d^5(pd6Xw04h-Ju60dh_<$ITl;)T^YK zPZM(v(9;_9`1{ljbn^u2hnXuo<4@oJx3e(s4Vt$JvO5r3Q5pRg-$@VJ$0r3;nqfH4BQZ$HA@D34`z($LRcW=H>)m=)6uy7)-taQ^BX8G(@Zw}*M27&(yPV>4zK^MI6| zJp=mG5|Wf4|$s(|46qmHsjG*=iRngNcV_d9%P^7C!>6 ztZ>=QUZWXPJ=SwRR%D|=Shvqr{keF45gDivQ+rM+Lsk#BS1(JgYV82~GnJht`)TC@ zuh9lx)p&C@{K7}pr)k5yuXa?GuuNfwUqqRWd*M};oB=d=2r--Y#G8vkgNFoq;of-j zP-yVBSZ_|*ijm*Z&3R;K@U}&^j>D@?;n~>BCCtI?1_f6wuz+@PvnXY!B_T`nn-v;+ zQKnGCi>D8uwWBZpil^W4m0$JFcYXEOeC^kL{WrYZ*wRVR0nJ4SU^is?An&s2FC}l@Y|Wt$9T`x<&AZ-A)2LK zSoZpguFTn$2TaUY51X{(RvYX3Gp?LIUEwp!a>#+YHzZ*03>rubnn_9oO<>T@<^0)GL z^7rx&@{jUQ^3U=w@~`r5az{QPcmLn@fqH}b*q&6!<_vYTsgCy6YRsw%Dji*%rH>}{(Rx$s<8}G;0(3#0oP|CRt!%Z6ZA)$s>;Cstl`CBzI~CY-KIU3H zeKZsnDcZTEO)c2G!zLd#6m8j3o}p_`LOw0ewp0s(XU)$|kAZIJTO{smX)_P|WTrYI z3OMK^-wD=@U@j)0uHx1rmK0L7Q0b-oCODBBD+0bUz3ym8M980r?iCSV zo3U|XAQ81D0#rrF*Jo|U7);F6D{IDV!I?dU+RL*AiJ5w3%_tt%Nik)`a_ekBOw}uE zh6}I6Q$TDb%tplIy0T_gR*MaV8g~+AL&D1{Yi6aipHS0{glWv&9T###O~V`?MyC(f z4%x9`dNh1AbWTLFq{@Jcj}eTq3|Ww)jj0-L;p<$c$;f6m78)+fF%mBnohFv{QQq3Y zUMD7ue6h6BoY3OfpNr=g$hBLyj}ScQ?dqN{-@OTorHRe72}?`WpNpqiHzKZe^Zitl z6RXlQ`P|~^yAJy)n*Z2LW++5wqxN<7dLFt>k@oV&P}>2RYTM;Fg^Q;T@?#!sd)!=l zW76lxN5B7X^yv$nceiz4=`)MLLmC8q{MXKa-+1FQC&FI5{1CyOK%1L39Tq|2{*s#g z#Nz22x4(iy+xT6?DYUs2eeMip~*h@CpAcUnBI^x(SXYT-MHE98BRk z4((Le#$`6N2keSR?n3d}79MMuumwuOUL!rzffZ5P4od<3vQm zGmWv@9cl>30HKdalAfNE(K8td4tQX^tdA4rHLB{1+k#3ps(CMnKv%ZCQDm7 zn|hK0^7NZLGlbP%YzXGp$kDcc5ZM^=6b@Dz*q+JuQ8a97=eC(pQ21H5Y(5{T%Pu}0 zyG9O=)bOYfPgrCnqHUvRqbZ^U1iSR;?m0Bx=@}2m1{6Vn+Qbj@D+7+Ss~5?<&jU;u zRs02co`)7z4_9$^fYU5!0ZsPk5H+x9_~!(y#HzxM&;%OM(2aIoZGJU0aVh?6bbJY8aFncccZGiC43nemQ<500s#YGe<=* zr^QJqYC;W)1)6t=lO33(=GWrm^Cq+JRUFL2+}+OQb7Chnl9O@Io5j=jMO^rRXo^^y zN!v8gLr>IkQQ@@NcbzjZNodJlikL!xNlp{2wRB{ucD{yQYI9l(c4w3c-?A6|mxp>v z<@UIOmsPRynbPSus|+vtg|twiQ@^Sd=Cmy7T$Wkave8v2)JGVdfF3I#suK) zqj?cG3dFYUGRi@Lh&ju=keemQ?W|=By-Nlakema}i+Q1h*(@*(EHHIWH4o_gCUV>x z)DDXC$J|4|+wT@`S|o+z-k$z$@#c6%PlxJs8ji-u(80+S9!;@_IZ-k7`3({lYDANV zaR3TO#u%|VkqUD{M@}AzEQ>7C$~jBjpX2NyaG^}ikFsbE%g#huK$IWsYRZqT>X{j4 zkZCVR*5LAihQp$kv#~4#969BnHc^?52eNV&vgAztT)aMDFHBIT9g#QK?V}Q&(x%qf|3aJy!XH#mf)+Rd+TA5jEW-Ht! zXA5Z3eHjygx2Ja_$|&V~gGhI_h?hPs+neOMrq&w%B$Kv(DrQj#c#x7|>;}oXpakPi^i83Q4o{qac6Z%Z}+QES~ zI2c!1bT$BAyYozCN6U$ZhnP0)8xQVW(b|TH=-bmoeCOFpSvW`IjX~7-wR1&-01@>> z2x)xoq%>;y+PR{Y2obepAmZ~UrBQ>|&cjxYja-iJJ1LFWw1pgZE!Xf{69wa!?h1Q=rx^sWU zE6*n1Tg+TJ1!iuWz`Vbr3})sr@WC-U)fLC+eD-E#Iy0X!m<63VW==W9D=MvkIN3 zcEK?K+R;XK`q2I@mD3bGXOX?K;v2HN%Iu{ALzqNFyL=9%@8$v a&2b9W0wR0mw+p!$IoY3(SqK`l>;D0Y6_gYJ literal 0 HcmV?d00001