From fcbf63e62c627deae76c1b8cb8c0876c536ed811 Mon Sep 17 00:00:00 2001 From: Jari Vetoniemi Date: Mon, 16 Mar 2020 18:49:26 +0900 Subject: Fresh start --- jni/iconv/extras/iconv_string.c | 154 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 jni/iconv/extras/iconv_string.c (limited to 'jni/iconv/extras/iconv_string.c') diff --git a/jni/iconv/extras/iconv_string.c b/jni/iconv/extras/iconv_string.c new file mode 100644 index 0000000..f2c4b52 --- /dev/null +++ b/jni/iconv/extras/iconv_string.c @@ -0,0 +1,154 @@ +/* Copyright (C) 1999-2001, 2003 Bruno Haible. + This file is not part of the GNU LIBICONV Library. + This file is put into the public domain. */ + +#include "iconv_string.h" +#include +#include +#include +#include + +#define tmpbufsize 4096 + +int iconv_string (const char* tocode, const char* fromcode, + const char* start, const char* end, + char** resultp, size_t* lengthp) +{ + iconv_t cd = iconv_open(tocode,fromcode); + size_t length; + char* result; + if (cd == (iconv_t)(-1)) { + if (errno != EINVAL) + return -1; + /* Unsupported fromcode or tocode. Check whether the caller requested + autodetection. */ + if (!strcmp(fromcode,"autodetect_utf8")) { + int ret; + /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would + be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */ + ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp); + if (!(ret < 0 && errno == EILSEQ)) + return ret; + ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp); + return ret; + } + if (!strcmp(fromcode,"autodetect_jp")) { + int ret; + /* Try 7-bit encoding first. If the input contains bytes >= 0x80, + it will fail. */ + ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp); + if (!(ret < 0 && errno == EILSEQ)) + return ret; + /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This + is unavoidable. People will condemn SHIFT_JIS. + If we tried SHIFT_JIS first, then some short EUC-JP inputs would + come out wrong, and people would condemn EUC-JP and Unix, which + would not be good. */ + ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp); + if (!(ret < 0 && errno == EILSEQ)) + return ret; + /* Finally try SHIFT_JIS. */ + ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp); + return ret; + } + if (!strcmp(fromcode,"autodetect_kr")) { + int ret; + /* Try 7-bit encoding first. If the input contains bytes >= 0x80, + it will fail. */ + ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp); + if (!(ret < 0 && errno == EILSEQ)) + return ret; + /* Finally try EUC-KR. */ + ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp); + return ret; + } + errno = EINVAL; + return -1; + } + /* Determine the length we need. */ + { + size_t count = 0; + char tmpbuf[tmpbufsize]; + const char* inptr = start; + size_t insize = end-start; + while (insize > 0) { + char* outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); + if (res == (size_t)(-1) && errno != E2BIG) { + if (errno == EINVAL) + break; + else { + int saved_errno = errno; + iconv_close(cd); + errno = saved_errno; + return -1; + } + } + count += outptr-tmpbuf; + } + { + char* outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); + if (res == (size_t)(-1)) { + int saved_errno = errno; + iconv_close(cd); + errno = saved_errno; + return -1; + } + count += outptr-tmpbuf; + } + length = count; + } + if (lengthp != NULL) + *lengthp = length; + if (resultp == NULL) { + iconv_close(cd); + return 0; + } + result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length)); + *resultp = result; + if (length == 0) { + iconv_close(cd); + return 0; + } + if (result == NULL) { + iconv_close(cd); + errno = ENOMEM; + return -1; + } + iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */ + /* Do the conversion for real. */ + { + const char* inptr = start; + size_t insize = end-start; + char* outptr = result; + size_t outsize = length; + while (insize > 0) { + size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); + if (res == (size_t)(-1)) { + if (errno == EINVAL) + break; + else { + int saved_errno = errno; + iconv_close(cd); + errno = saved_errno; + return -1; + } + } + } + { + size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); + if (res == (size_t)(-1)) { + int saved_errno = errno; + iconv_close(cd); + errno = saved_errno; + return -1; + } + } + if (outsize != 0) abort(); + } + iconv_close(cd); + return 0; +} -- cgit v1.2.3