3 files changed, 265 insertions, 0 deletions
diff --git a/jni/iconv/extras/ChangeLog b/jni/iconv/extras/ChangeLog
new file mode 100644
index 0000000..e74835b
--- /dev/null
+++ b/jni/iconv/extras/ChangeLog
@@ -0,0 +1,64 @@
+2003-05-10  Bruno Haible  <bruno@clisp.org>
+
+	* iconv_string.c (iconv_string): Don't return -1 just because the
+	string is longer than 4 KB.
+
+2002-02-13  Bruno Haible  <bruno@clisp.org>
+
+	* iconv.m4: Remove file. Obsoleted by m4/iconv.m4.
+
+2000-06-16  Bruno Haible  <haible@clisp.cons.org>
+
+        * iconv.m4: Change prefix to AM.
+
+2001-05-23  Bruno Haible  <haible@clisp.cons.org>
+
+        * iconv.m4 (jm_ICONV): Accept --with-libiconv-prefix option.
+
+2001-03-23  Bruno Haible  <haible@clisp.cons.org>
+
+        * iconv.m4 (jm_ICONV): Tweak printing of prototype.
+
+2001-03-20  Bruno Haible  <haible@clisp.cons.org>
+
+        * iconv.m4 (jm_ICONV): Recommend GNU libiconv.
+
+2001-01-03  Bruno Haible  <haible@clisp.cons.org>
+
+        * iconv.m4 (jm_ICONV): Also check whether the iconv declaration
+          has const.
+
+2000-02-02  Bruno Haible  <haible@clisp.cons.org>
+
+        * iconv.m4: New file.
+
+2001-01-29  Bruno Haible  <haible@clisp.cons.org>
+
+        * locale_charset.c: Remove file. Obsoleted by libcharset.
+
+2000-10-22  Bruno Haible  <haible@clisp.cons.org>
+
+        * locale_charset.c (get_locale_charset): Accept french and spanish
+          names in both ISO-8859-1 and UTF-8.
+
+2000-08-24  Jim Blackson  <blackson@ontrack-japan.com>
+
+        * iconv_string.c (iconv_string): Fix return value for autodetect.
+
+2000-01-24  Bruno Haible  <haible@clisp.cons.org>
+
+        * iconv_string.c (iconv_string): Stop recognizing JOHAB.
+          Fix typo for EUC-JP.
+
+1999-12-18  Bruno Haible  <haible@clisp.cons.org>
+
+        * locale_charset.c (get_locale_charset): Recognize more language codes:
+          "af" (afrikaans), "ca" (catalan), "eu" (basque), "fo" (faeroese),
+          "ga" (irish), "gd" (scottish), "gl" (galician), "sq" (albanian),
+          "eo" (esperanto), "mt" (maltese), "be" (byelorussian),
+          "et" (estonian), "lt" (lithuanian), "lv" (latvian), "uk" (ukrainian).
+          Recognize more aliases: "english", "slovenian", "macedonian",
+          "serbian", "arabic".
+          Change default: KOI8-R for "ru" (russian) instead of ISO-8859-5,
+          ISO-8859-5 for "sr" instead of ISO-8859-2.
+
diff --git a/jni/iconv/extras/iconv_string.c b/jni/iconv/extras/iconv_string.c
new file mode 100644
index 0000000..f2c4b52
--- /dev/null
+++ b/jni/iconv/extras/iconv_string.c
@@ -0,0 +1,154 @@
+/* Copyright (C) 1999-2001, 2003 Bruno Haible.
+   This file is not part of the GNU LIBICONV Library.
+   This file is put into the public domain.  */
+
+#include "iconv_string.h"
+#include <iconv.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define tmpbufsize 4096
+
+int iconv_string (const char* tocode, const char* fromcode,
+                  const char* start, const char* end,
+                  char** resultp, size_t* lengthp)
+{
+  iconv_t cd = iconv_open(tocode,fromcode);
+  size_t length;
+  char* result;
+  if (cd == (iconv_t)(-1)) {
+    if (errno != EINVAL)
+      return -1;
+    /* Unsupported fromcode or tocode. Check whether the caller requested
+       autodetection. */
+    if (!strcmp(fromcode,"autodetect_utf8")) {
+      int ret;
+      /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
+         be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
+      ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
+      if (!(ret < 0 && errno == EILSEQ))
+        return ret;
+      ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
+      return ret;
+    }
+    if (!strcmp(fromcode,"autodetect_jp")) {
+      int ret;
+      /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
+         it will fail. */
+      ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
+      if (!(ret < 0 && errno == EILSEQ))
+        return ret;
+      /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
+         is unavoidable. People will condemn SHIFT_JIS.
+         If we tried SHIFT_JIS first, then some short EUC-JP inputs would
+         come out wrong, and people would condemn EUC-JP and Unix, which
+         would not be good. */
+      ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
+      if (!(ret < 0 && errno == EILSEQ))
+        return ret;
+      /* Finally try SHIFT_JIS. */
+      ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
+      return ret;
+    }
+    if (!strcmp(fromcode,"autodetect_kr")) {
+      int ret;
+      /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
+         it will fail. */
+      ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
+      if (!(ret < 0 && errno == EILSEQ))
+        return ret;
+      /* Finally try EUC-KR. */
+      ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
+      return ret;
+    }
+    errno = EINVAL;
+    return -1;
+  }
+  /* Determine the length we need. */
+  {
+    size_t count = 0;
+    char tmpbuf[tmpbufsize];
+    const char* inptr = start;
+    size_t insize = end-start;
+    while (insize > 0) {
+      char* outptr = tmpbuf;
+      size_t outsize = tmpbufsize;
+      size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
+      if (res == (size_t)(-1) && errno != E2BIG) {
+        if (errno == EINVAL)
+          break;
+        else {
+          int saved_errno = errno;
+          iconv_close(cd);
+          errno = saved_errno;
+          return -1;
+        }
+      }
+      count += outptr-tmpbuf;
+    }
+    {
+      char* outptr = tmpbuf;
+      size_t outsize = tmpbufsize;
+      size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
+      if (res == (size_t)(-1)) {
+        int saved_errno = errno;
+        iconv_close(cd);
+        errno = saved_errno;
+        return -1;
+      }
+      count += outptr-tmpbuf;
+    }
+    length = count;
+  }
+  if (lengthp != NULL)
+    *lengthp = length;
+  if (resultp == NULL) {
+    iconv_close(cd);
+    return 0;
+  }
+  result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
+  *resultp = result;
+  if (length == 0) {
+    iconv_close(cd);
+    return 0;
+  }
+  if (result == NULL) {
+    iconv_close(cd);
+    errno = ENOMEM;
+    return -1;
+  }
+  iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
+  /* Do the conversion for real. */
+  {
+    const char* inptr = start;
+    size_t insize = end-start;
+    char* outptr = result;
+    size_t outsize = length;
+    while (insize > 0) {
+      size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
+      if (res == (size_t)(-1)) {
+        if (errno == EINVAL)
+          break;
+        else {
+          int saved_errno = errno;
+          iconv_close(cd);
+          errno = saved_errno;
+          return -1;
+        }
+      }
+    }
+    {
+      size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
+      if (res == (size_t)(-1)) {
+        int saved_errno = errno;
+        iconv_close(cd);
+        errno = saved_errno;
+        return -1;
+      }
+    }
+    if (outsize != 0) abort();
+  }
+  iconv_close(cd);
+  return 0;
+}
diff --git a/jni/iconv/extras/iconv_string.h b/jni/iconv/extras/iconv_string.h
new file mode 100644
index 0000000..faab8c6
--- /dev/null
+++ b/jni/iconv/extras/iconv_string.h
@@ -0,0 +1,47 @@
+/* Copyright (C) 1999-2001 Bruno Haible.
+   This file is not part of the GNU LIBICONV Library.
+   This file is put into the public domain.  */
+
+/*
+ * This C function converts an entire string from one encoding to another,
+ * using iconv. Easier to use than iconv() itself, and supports autodetect
+ * encodings on input.
+ *
+ *   int iconv_string (const char* tocode, const char* fromcode,
+ *                     const char* start, const char* end,
+ *                     char** resultp, size_t* lengthp)
+ *
+ * Converts a memory region given in encoding FROMCODE to a new memory
+ * region in encoding TOCODE. FROMCODE and TOCODE are as for iconv_open(3),
+ * except that FROMCODE may be one of the values
+ *    "autodetect_utf8"          supports ISO-8859-1 and UTF-8
+ *    "autodetect_jp"            supports EUC-JP, ISO-2022-JP-2 and SHIFT_JIS
+ *    "autodetect_kr"            supports EUC-KR and ISO-2022-KR
+ * The input is in the memory region between start (inclusive) and end
+ * (exclusive). If resultp is not NULL, the output string is stored in
+ * *resultp; malloc/realloc is used to allocate the result.
+ *
+ * This function does not treat zero characters specially.
+ *
+ * Return value: 0 if successful, otherwise -1 and errno set. Particular
+ * errno values: EILSEQ and ENOMEM.
+ *
+ * Example:
+ *   const char* s = ...;
+ *   char* result = NULL;
+ *   if (iconv_string("UCS-4-INTERNAL", "autodetect_utf8",
+ *                    s, s+strlen(s)+1, &result, NULL) < 0)
+ *     perror("iconv_string");
+ *
+ */
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int iconv_string (const char* tocode, const char* fromcode, const char* start, const char* end, char** resultp, size_t* lengthp);
+
+#ifdef __cplusplus
+}
+#endif