diff options
Diffstat (limited to 'jni/iconv/extras')
-rw-r--r-- | jni/iconv/extras/ChangeLog | 64 | ||||
-rw-r--r-- | jni/iconv/extras/iconv_string.c | 154 | ||||
-rw-r--r-- | jni/iconv/extras/iconv_string.h | 47 |
3 files changed, 265 insertions, 0 deletions
diff --git a/jni/iconv/extras/ChangeLog b/jni/iconv/extras/ChangeLog new file mode 100644 index 0000000..e74835b --- /dev/null +++ b/jni/iconv/extras/ChangeLog @@ -0,0 +1,64 @@ +2003-05-10 Bruno Haible <bruno@clisp.org> + + * iconv_string.c (iconv_string): Don't return -1 just because the + string is longer than 4 KB. + +2002-02-13 Bruno Haible <bruno@clisp.org> + + * iconv.m4: Remove file. Obsoleted by m4/iconv.m4. + +2000-06-16 Bruno Haible <haible@clisp.cons.org> + + * iconv.m4: Change prefix to AM. + +2001-05-23 Bruno Haible <haible@clisp.cons.org> + + * iconv.m4 (jm_ICONV): Accept --with-libiconv-prefix option. + +2001-03-23 Bruno Haible <haible@clisp.cons.org> + + * iconv.m4 (jm_ICONV): Tweak printing of prototype. + +2001-03-20 Bruno Haible <haible@clisp.cons.org> + + * iconv.m4 (jm_ICONV): Recommend GNU libiconv. + +2001-01-03 Bruno Haible <haible@clisp.cons.org> + + * iconv.m4 (jm_ICONV): Also check whether the iconv declaration + has const. + +2000-02-02 Bruno Haible <haible@clisp.cons.org> + + * iconv.m4: New file. + +2001-01-29 Bruno Haible <haible@clisp.cons.org> + + * locale_charset.c: Remove file. Obsoleted by libcharset. + +2000-10-22 Bruno Haible <haible@clisp.cons.org> + + * locale_charset.c (get_locale_charset): Accept french and spanish + names in both ISO-8859-1 and UTF-8. + +2000-08-24 Jim Blackson <blackson@ontrack-japan.com> + + * iconv_string.c (iconv_string): Fix return value for autodetect. + +2000-01-24 Bruno Haible <haible@clisp.cons.org> + + * iconv_string.c (iconv_string): Stop recognizing JOHAB. + Fix typo for EUC-JP. + +1999-12-18 Bruno Haible <haible@clisp.cons.org> + + * locale_charset.c (get_locale_charset): Recognize more language codes: + "af" (afrikaans), "ca" (catalan), "eu" (basque), "fo" (faeroese), + "ga" (irish), "gd" (scottish), "gl" (galician), "sq" (albanian), + "eo" (esperanto), "mt" (maltese), "be" (byelorussian), + "et" (estonian), "lt" (lithuanian), "lv" (latvian), "uk" (ukrainian). + Recognize more aliases: "english", "slovenian", "macedonian", + "serbian", "arabic". + Change default: KOI8-R for "ru" (russian) instead of ISO-8859-5, + ISO-8859-5 for "sr" instead of ISO-8859-2. + diff --git a/jni/iconv/extras/iconv_string.c b/jni/iconv/extras/iconv_string.c new file mode 100644 index 0000000..f2c4b52 --- /dev/null +++ b/jni/iconv/extras/iconv_string.c @@ -0,0 +1,154 @@ +/* Copyright (C) 1999-2001, 2003 Bruno Haible. + This file is not part of the GNU LIBICONV Library. + This file is put into the public domain. */ + +#include "iconv_string.h" +#include <iconv.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#define tmpbufsize 4096 + +int iconv_string (const char* tocode, const char* fromcode, + const char* start, const char* end, + char** resultp, size_t* lengthp) +{ + iconv_t cd = iconv_open(tocode,fromcode); + size_t length; + char* result; + if (cd == (iconv_t)(-1)) { + if (errno != EINVAL) + return -1; + /* Unsupported fromcode or tocode. Check whether the caller requested + autodetection. */ + if (!strcmp(fromcode,"autodetect_utf8")) { + int ret; + /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would + be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */ + ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp); + if (!(ret < 0 && errno == EILSEQ)) + return ret; + ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp); + return ret; + } + if (!strcmp(fromcode,"autodetect_jp")) { + int ret; + /* Try 7-bit encoding first. If the input contains bytes >= 0x80, + it will fail. */ + ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp); + if (!(ret < 0 && errno == EILSEQ)) + return ret; + /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This + is unavoidable. People will condemn SHIFT_JIS. + If we tried SHIFT_JIS first, then some short EUC-JP inputs would + come out wrong, and people would condemn EUC-JP and Unix, which + would not be good. */ + ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp); + if (!(ret < 0 && errno == EILSEQ)) + return ret; + /* Finally try SHIFT_JIS. */ + ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp); + return ret; + } + if (!strcmp(fromcode,"autodetect_kr")) { + int ret; + /* Try 7-bit encoding first. If the input contains bytes >= 0x80, + it will fail. */ + ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp); + if (!(ret < 0 && errno == EILSEQ)) + return ret; + /* Finally try EUC-KR. */ + ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp); + return ret; + } + errno = EINVAL; + return -1; + } + /* Determine the length we need. */ + { + size_t count = 0; + char tmpbuf[tmpbufsize]; + const char* inptr = start; + size_t insize = end-start; + while (insize > 0) { + char* outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); + if (res == (size_t)(-1) && errno != E2BIG) { + if (errno == EINVAL) + break; + else { + int saved_errno = errno; + iconv_close(cd); + errno = saved_errno; + return -1; + } + } + count += outptr-tmpbuf; + } + { + char* outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); + if (res == (size_t)(-1)) { + int saved_errno = errno; + iconv_close(cd); + errno = saved_errno; + return -1; + } + count += outptr-tmpbuf; + } + length = count; + } + if (lengthp != NULL) + *lengthp = length; + if (resultp == NULL) { + iconv_close(cd); + return 0; + } + result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length)); + *resultp = result; + if (length == 0) { + iconv_close(cd); + return 0; + } + if (result == NULL) { + iconv_close(cd); + errno = ENOMEM; + return -1; + } + iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */ + /* Do the conversion for real. */ + { + const char* inptr = start; + size_t insize = end-start; + char* outptr = result; + size_t outsize = length; + while (insize > 0) { + size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); + if (res == (size_t)(-1)) { + if (errno == EINVAL) + break; + else { + int saved_errno = errno; + iconv_close(cd); + errno = saved_errno; + return -1; + } + } + } + { + size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); + if (res == (size_t)(-1)) { + int saved_errno = errno; + iconv_close(cd); + errno = saved_errno; + return -1; + } + } + if (outsize != 0) abort(); + } + iconv_close(cd); + return 0; +} diff --git a/jni/iconv/extras/iconv_string.h b/jni/iconv/extras/iconv_string.h new file mode 100644 index 0000000..faab8c6 --- /dev/null +++ b/jni/iconv/extras/iconv_string.h @@ -0,0 +1,47 @@ +/* Copyright (C) 1999-2001 Bruno Haible. + This file is not part of the GNU LIBICONV Library. + This file is put into the public domain. */ + +/* + * This C function converts an entire string from one encoding to another, + * using iconv. Easier to use than iconv() itself, and supports autodetect + * encodings on input. + * + * int iconv_string (const char* tocode, const char* fromcode, + * const char* start, const char* end, + * char** resultp, size_t* lengthp) + * + * Converts a memory region given in encoding FROMCODE to a new memory + * region in encoding TOCODE. FROMCODE and TOCODE are as for iconv_open(3), + * except that FROMCODE may be one of the values + * "autodetect_utf8" supports ISO-8859-1 and UTF-8 + * "autodetect_jp" supports EUC-JP, ISO-2022-JP-2 and SHIFT_JIS + * "autodetect_kr" supports EUC-KR and ISO-2022-KR + * The input is in the memory region between start (inclusive) and end + * (exclusive). If resultp is not NULL, the output string is stored in + * *resultp; malloc/realloc is used to allocate the result. + * + * This function does not treat zero characters specially. + * + * Return value: 0 if successful, otherwise -1 and errno set. Particular + * errno values: EILSEQ and ENOMEM. + * + * Example: + * const char* s = ...; + * char* result = NULL; + * if (iconv_string("UCS-4-INTERNAL", "autodetect_utf8", + * s, s+strlen(s)+1, &result, NULL) < 0) + * perror("iconv_string"); + * + */ +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern int iconv_string (const char* tocode, const char* fromcode, const char* start, const char* end, char** resultp, size_t* lengthp); + +#ifdef __cplusplus +} +#endif |