diff options
Diffstat (limited to 'jni/iconv/lib/loop_unicode.h')
-rw-r--r-- | jni/iconv/lib/loop_unicode.h | 527 |
1 files changed, 527 insertions, 0 deletions
diff --git a/jni/iconv/lib/loop_unicode.h b/jni/iconv/lib/loop_unicode.h new file mode 100644 index 0000000..1c787b5 --- /dev/null +++ b/jni/iconv/lib/loop_unicode.h @@ -0,0 +1,527 @@ +/* + * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc. + * This file is part of the GNU LIBICONV Library. + * + * The GNU LIBICONV Library is free software; you can redistribute it + * and/or modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * The GNU LIBICONV Library is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with the GNU LIBICONV Library; see the file COPYING.LIB. + * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, + * Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/* This file defines the conversion loop via Unicode as a pivot encoding. */ + +/* Attempt to transliterate wc. Return code as in xxx_wctomb. */ +static int unicode_transliterate (conv_t cd, ucs4_t wc, + unsigned char* outptr, size_t outleft) +{ + if (cd->oflags & HAVE_HANGUL_JAMO) { + /* Decompose Hangul into Jamo. Use double-width Jamo (contained + in all Korean encodings and ISO-2022-JP-2), not half-width Jamo + (contained in Unicode only). */ + ucs4_t buf[3]; + int ret = johab_hangul_decompose(cd,buf,wc); + if (ret != RET_ILUNI) { + /* we know 1 <= ret <= 3 */ + state_t backup_state = cd->ostate; + unsigned char* backup_outptr = outptr; + size_t backup_outleft = outleft; + int i, sub_outcount; + for (i = 0; i < ret; i++) { + if (outleft == 0) { + sub_outcount = RET_TOOSMALL; + goto johab_hangul_failed; + } + sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); + if (sub_outcount <= RET_ILUNI) + goto johab_hangul_failed; + if (!(sub_outcount <= outleft)) abort(); + outptr += sub_outcount; outleft -= sub_outcount; + } + return outptr-backup_outptr; + johab_hangul_failed: + cd->ostate = backup_state; + outptr = backup_outptr; + outleft = backup_outleft; + if (sub_outcount != RET_ILUNI) + return RET_TOOSMALL; + } + } + { + /* Try to use a variant, but postfix it with + U+303E IDEOGRAPHIC VARIATION INDICATOR + (cf. Ken Lunde's "CJKV information processing", p. 188). */ + int indx = -1; + if (wc == 0x3006) + indx = 0; + else if (wc == 0x30f6) + indx = 1; + else if (wc >= 0x4e00 && wc < 0xa000) + indx = cjk_variants_indx[wc-0x4e00]; + if (indx >= 0) { + for (;; indx++) { + ucs4_t buf[2]; + unsigned short variant = cjk_variants[indx]; + unsigned short last = variant & 0x8000; + variant &= 0x7fff; + variant += 0x3000; + buf[0] = variant; buf[1] = 0x303e; + { + state_t backup_state = cd->ostate; + unsigned char* backup_outptr = outptr; + size_t backup_outleft = outleft; + int i, sub_outcount; + for (i = 0; i < 2; i++) { + if (outleft == 0) { + sub_outcount = RET_TOOSMALL; + goto variant_failed; + } + sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); + if (sub_outcount <= RET_ILUNI) + goto variant_failed; + if (!(sub_outcount <= outleft)) abort(); + outptr += sub_outcount; outleft -= sub_outcount; + } + return outptr-backup_outptr; + variant_failed: + cd->ostate = backup_state; + outptr = backup_outptr; + outleft = backup_outleft; + if (sub_outcount != RET_ILUNI) + return RET_TOOSMALL; + } + if (last) + break; + } + } + } + if (wc >= 0x2018 && wc <= 0x201a) { + /* Special case for quotation marks 0x2018, 0x2019, 0x201a */ + ucs4_t substitute = + (cd->oflags & HAVE_QUOTATION_MARKS + ? (wc == 0x201a ? 0x2018 : wc) + : (cd->oflags & HAVE_ACCENTS + ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */ + : 0x0027 /* use apostrophe */ + ) ); + int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft); + if (outcount != RET_ILUNI) + return outcount; + } + { + /* Use the transliteration table. */ + int indx = translit_index(wc); + if (indx >= 0) { + const unsigned int * cp = &translit_data[indx]; + unsigned int num = *cp++; + state_t backup_state = cd->ostate; + unsigned char* backup_outptr = outptr; + size_t backup_outleft = outleft; + unsigned int i; + int sub_outcount; + for (i = 0; i < num; i++) { + if (outleft == 0) { + sub_outcount = RET_TOOSMALL; + goto translit_failed; + } + sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft); + if (sub_outcount == RET_ILUNI) + /* Recursive transliteration. */ + sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft); + if (sub_outcount <= RET_ILUNI) + goto translit_failed; + if (!(sub_outcount <= outleft)) abort(); + outptr += sub_outcount; outleft -= sub_outcount; + } + return outptr-backup_outptr; + translit_failed: + cd->ostate = backup_state; + outptr = backup_outptr; + outleft = backup_outleft; + if (sub_outcount != RET_ILUNI) + return RET_TOOSMALL; + } + } + return RET_ILUNI; +} + +#ifndef LIBICONV_PLUG + +struct uc_to_mb_fallback_locals { + unsigned char* l_outbuf; + size_t l_outbytesleft; + int l_errno; +}; + +static void uc_to_mb_write_replacement (const char *buf, size_t buflen, + void* callback_arg) +{ + struct uc_to_mb_fallback_locals * plocals = + (struct uc_to_mb_fallback_locals *) callback_arg; + /* Do nothing if already encountered an error in a previous call. */ + if (plocals->l_errno == 0) { + /* Attempt to copy the passed buffer to the output buffer. */ + if (plocals->l_outbytesleft < buflen) + plocals->l_errno = E2BIG; + else { + memcpy(plocals->l_outbuf, buf, buflen); + plocals->l_outbuf += buflen; + plocals->l_outbytesleft -= buflen; + } + } +} + +struct mb_to_uc_fallback_locals { + conv_t l_cd; + unsigned char* l_outbuf; + size_t l_outbytesleft; + int l_errno; +}; + +static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen, + void* callback_arg) +{ + struct mb_to_uc_fallback_locals * plocals = + (struct mb_to_uc_fallback_locals *) callback_arg; + /* Do nothing if already encountered an error in a previous call. */ + if (plocals->l_errno == 0) { + /* Attempt to convert the passed buffer to the target encoding. */ + conv_t cd = plocals->l_cd; + unsigned char* outptr = plocals->l_outbuf; + size_t outleft = plocals->l_outbytesleft; + for (; buflen > 0; buf++, buflen--) { + ucs4_t wc = *buf; + int outcount; + if (outleft == 0) { + plocals->l_errno = E2BIG; + break; + } + outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + /* Handle Unicode tag characters (range U+E0000..U+E007F). */ + if ((wc >> 7) == (0xe0000 >> 7)) + goto outcount_zero; + /* Try transliteration. */ + if (cd->transliterate) { + outcount = unicode_transliterate(cd,wc,outptr,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + } + if (cd->discard_ilseq) { + outcount = 0; + goto outcount_ok; + } + #ifndef LIBICONV_PLUG + else if (cd->fallbacks.uc_to_mb_fallback != NULL) { + struct uc_to_mb_fallback_locals locals; + locals.l_outbuf = outptr; + locals.l_outbytesleft = outleft; + locals.l_errno = 0; + cd->fallbacks.uc_to_mb_fallback(wc, + uc_to_mb_write_replacement, + &locals, + cd->fallbacks.data); + if (locals.l_errno != 0) { + plocals->l_errno = locals.l_errno; + break; + } + outptr = locals.l_outbuf; + outleft = locals.l_outbytesleft; + outcount = 0; + goto outcount_ok; + } + #endif + outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + plocals->l_errno = EILSEQ; + break; + outcount_ok: + if (outcount < 0) { + plocals->l_errno = E2BIG; + break; + } + #ifndef LIBICONV_PLUG + if (cd->hooks.uc_hook) + (*cd->hooks.uc_hook)(wc, cd->hooks.data); + #endif + if (!(outcount <= outleft)) abort(); + outptr += outcount; outleft -= outcount; + outcount_zero: ; + } + plocals->l_outbuf = outptr; + plocals->l_outbytesleft = outleft; + } +} + +#endif /* !LIBICONV_PLUG */ + +static size_t unicode_loop_convert (iconv_t icd, + const char* * inbuf, size_t *inbytesleft, + char* * outbuf, size_t *outbytesleft) +{ + conv_t cd = (conv_t) icd; + size_t result = 0; + const unsigned char* inptr = (const unsigned char*) *inbuf; + size_t inleft = *inbytesleft; + unsigned char* outptr = (unsigned char*) *outbuf; + size_t outleft = *outbytesleft; + while (inleft > 0) { + state_t last_istate = cd->istate; + ucs4_t wc; + int incount; + int outcount; + incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft); + if (incount < 0) { + if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) { + /* Case 1: invalid input, possibly after a shift sequence */ + incount = DECODE_SHIFT_ILSEQ(incount); + if (cd->discard_ilseq) { + switch (cd->iindex) { + case ei_ucs4: case ei_ucs4be: case ei_ucs4le: + case ei_utf32: case ei_utf32be: case ei_utf32le: + case ei_ucs4internal: case ei_ucs4swapped: + incount += 4; break; + case ei_ucs2: case ei_ucs2be: case ei_ucs2le: + case ei_utf16: case ei_utf16be: case ei_utf16le: + case ei_ucs2internal: case ei_ucs2swapped: + incount += 2; break; + default: + incount += 1; break; + } + goto outcount_zero; + } + #ifndef LIBICONV_PLUG + else if (cd->fallbacks.mb_to_uc_fallback != NULL) { + unsigned int incount2; + struct mb_to_uc_fallback_locals locals; + switch (cd->iindex) { + case ei_ucs4: case ei_ucs4be: case ei_ucs4le: + case ei_utf32: case ei_utf32be: case ei_utf32le: + case ei_ucs4internal: case ei_ucs4swapped: + incount2 = 4; break; + case ei_ucs2: case ei_ucs2be: case ei_ucs2le: + case ei_utf16: case ei_utf16be: case ei_utf16le: + case ei_ucs2internal: case ei_ucs2swapped: + incount2 = 2; break; + default: + incount2 = 1; break; + } + locals.l_cd = cd; + locals.l_outbuf = outptr; + locals.l_outbytesleft = outleft; + locals.l_errno = 0; + cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2, + mb_to_uc_write_replacement, + &locals, + cd->fallbacks.data); + if (locals.l_errno != 0) { + inptr += incount; inleft -= incount; + errno = locals.l_errno; + result = -1; + break; + } + incount += incount2; + outptr = locals.l_outbuf; + outleft = locals.l_outbytesleft; + result += 1; + goto outcount_zero; + } + #endif + inptr += incount; inleft -= incount; + errno = EILSEQ; + result = -1; + break; + } + if (incount == RET_TOOFEW(0)) { + /* Case 2: not enough bytes available to detect anything */ + errno = EINVAL; + result = -1; + break; + } + /* Case 3: k bytes read, but only a shift sequence */ + incount = DECODE_TOOFEW(incount); + } else { + /* Case 4: k bytes read, making up a wide character */ + if (outleft == 0) { + cd->istate = last_istate; + errno = E2BIG; + result = -1; + break; + } + outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + /* Handle Unicode tag characters (range U+E0000..U+E007F). */ + if ((wc >> 7) == (0xe0000 >> 7)) + goto outcount_zero; + /* Try transliteration. */ + result++; + if (cd->transliterate) { + outcount = unicode_transliterate(cd,wc,outptr,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + } + if (cd->discard_ilseq) { + outcount = 0; + goto outcount_ok; + } + #ifndef LIBICONV_PLUG + else if (cd->fallbacks.uc_to_mb_fallback != NULL) { + struct uc_to_mb_fallback_locals locals; + locals.l_outbuf = outptr; + locals.l_outbytesleft = outleft; + locals.l_errno = 0; + cd->fallbacks.uc_to_mb_fallback(wc, + uc_to_mb_write_replacement, + &locals, + cd->fallbacks.data); + if (locals.l_errno != 0) { + cd->istate = last_istate; + errno = locals.l_errno; + return -1; + } + outptr = locals.l_outbuf; + outleft = locals.l_outbytesleft; + outcount = 0; + goto outcount_ok; + } + #endif + outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + cd->istate = last_istate; + errno = EILSEQ; + result = -1; + break; + outcount_ok: + if (outcount < 0) { + cd->istate = last_istate; + errno = E2BIG; + result = -1; + break; + } + #ifndef LIBICONV_PLUG + if (cd->hooks.uc_hook) + (*cd->hooks.uc_hook)(wc, cd->hooks.data); + #endif + if (!(outcount <= outleft)) abort(); + outptr += outcount; outleft -= outcount; + } + outcount_zero: + if (!(incount <= inleft)) abort(); + inptr += incount; inleft -= incount; + } + *inbuf = (const char*) inptr; + *inbytesleft = inleft; + *outbuf = (char*) outptr; + *outbytesleft = outleft; + return result; +} + +static size_t unicode_loop_reset (iconv_t icd, + char* * outbuf, size_t *outbytesleft) +{ + conv_t cd = (conv_t) icd; + if (outbuf == NULL || *outbuf == NULL) { + /* Reset the states. */ + memset(&cd->istate,'\0',sizeof(state_t)); + memset(&cd->ostate,'\0',sizeof(state_t)); + return 0; + } else { + size_t result = 0; + if (cd->ifuncs.xxx_flushwc) { + state_t last_istate = cd->istate; + ucs4_t wc; + if (cd->ifuncs.xxx_flushwc(cd, &wc)) { + unsigned char* outptr = (unsigned char*) *outbuf; + size_t outleft = *outbytesleft; + int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + /* Handle Unicode tag characters (range U+E0000..U+E007F). */ + if ((wc >> 7) == (0xe0000 >> 7)) + goto outcount_zero; + /* Try transliteration. */ + result++; + if (cd->transliterate) { + outcount = unicode_transliterate(cd,wc,outptr,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + } + if (cd->discard_ilseq) { + outcount = 0; + goto outcount_ok; + } + #ifndef LIBICONV_PLUG + else if (cd->fallbacks.uc_to_mb_fallback != NULL) { + struct uc_to_mb_fallback_locals locals; + locals.l_outbuf = outptr; + locals.l_outbytesleft = outleft; + locals.l_errno = 0; + cd->fallbacks.uc_to_mb_fallback(wc, + uc_to_mb_write_replacement, + &locals, + cd->fallbacks.data); + if (locals.l_errno != 0) { + cd->istate = last_istate; + errno = locals.l_errno; + return -1; + } + outptr = locals.l_outbuf; + outleft = locals.l_outbytesleft; + outcount = 0; + goto outcount_ok; + } + #endif + outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); + if (outcount != RET_ILUNI) + goto outcount_ok; + cd->istate = last_istate; + errno = EILSEQ; + return -1; + outcount_ok: + if (outcount < 0) { + cd->istate = last_istate; + errno = E2BIG; + return -1; + } + #ifndef LIBICONV_PLUG + if (cd->hooks.uc_hook) + (*cd->hooks.uc_hook)(wc, cd->hooks.data); + #endif + if (!(outcount <= outleft)) abort(); + outptr += outcount; + outleft -= outcount; + outcount_zero: + *outbuf = (char*) outptr; + *outbytesleft = outleft; + } + } + if (cd->ofuncs.xxx_reset) { + unsigned char* outptr = (unsigned char*) *outbuf; + size_t outleft = *outbytesleft; + int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft); + if (outcount < 0) { + errno = E2BIG; + return -1; + } + if (!(outcount <= outleft)) abort(); + *outbuf = (char*) (outptr + outcount); + *outbytesleft = outleft - outcount; + } + memset(&cd->istate,'\0',sizeof(state_t)); + memset(&cd->ostate,'\0',sizeof(state_t)); + return result; + } +} |