diff options
Diffstat (limited to 'jni/ruby/enc/trans/utf_16_32.c')
-rw-r--r-- | jni/ruby/enc/trans/utf_16_32.c | 847 |
1 files changed, 847 insertions, 0 deletions
diff --git a/jni/ruby/enc/trans/utf_16_32.c b/jni/ruby/enc/trans/utf_16_32.c new file mode 100644 index 0000000..73fa54e --- /dev/null +++ b/jni/ruby/enc/trans/utf_16_32.c @@ -0,0 +1,847 @@ +/* autogenerated. */ +/* src="transcode-tblgen.rb", len=28509, checksum=52888 */ +/* src="utf_16_32.trans", len=15308, checksum=28538 */ + +#include "transcode_data.h" + + + +static const unsigned char +utf_16_32_byte_array[1288] = { +#define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0 +220, 223, + 1, 1, 1, 1, + +#define from_UTF_16LE_00toFF_D8toDB_offsets 6 +0, 255, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +#define from_UTF_16LE_00toFF_offsets 264 +0, 255, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +#define from_UTF_32LE_00toFF_00toD7_00_offsets 522 +0, 0, + 0, + +#define from_UTF_32LE_00toFF_00toD7_offsets 525 +0, 16, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, + +#define from_UTF_32LE_00toFF_D8toDF_offsets 544 +1, 16, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +#define from_UTF_32LE_00toFF_offsets 562 +0, 255, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +#define from_UTF_32BE_00_offsets 820 +0, 16, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, + +#define from_UTF_8_C2toDF_offsets 839 +128, 191, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +#define from_UTF_8_E0_offsets 905 +160, 191, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +#define from_UTF_8_ED_offsets 939 +128, 159, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +#define from_UTF_8_F0_offsets 973 +144, 191, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +#define from_UTF_8_F4_offsets 1023 +128, 143, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +#define from_UTF_8_offsets 1041 +0, 244, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, + 6, 7, 7, 7, 8, + +}; +static const unsigned int +utf_16_32_word_array[106] = { +#define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0) + INVALID, FUNso, + +#define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2) + from_UTF_16LE_00toFF_D8toDB_00toFF_offsets, + from_UTF_16LE_00toFF_D8toDB_00toFF_infos, + +#define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4) + from_UTF_16LE_00toFF_D8toDB_00toFF, + +#define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_16LE_00toFF_D8toDB_infos, + +#define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7) + FUNso, from_UTF_16LE_00toFF_D8toDB, + INVALID, + +#define from_UTF_16LE_00toFF WORDINDEX2INFO(10) + from_UTF_16LE_00toFF_offsets, + from_UTF_16LE_00toFF_infos, + +#define from_UTF_16LE_infos WORDINDEX2INFO(12) + from_UTF_16LE_00toFF, + +#define from_UTF_16LE WORDINDEX2INFO(13) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_16LE_infos, + +#define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15) + FUNso, INVALID, + +#define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17) + from_UTF_32LE_00toFF_00toD7_00_offsets, + from_UTF_32LE_00toFF_00toD7_00_infos, + +#define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19) + from_UTF_32LE_00toFF_00toD7_00, INVALID, + +#define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21) + from_UTF_32LE_00toFF_00toD7_offsets, + from_UTF_32LE_00toFF_00toD7_infos, + +#define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23) + INVALID, from_UTF_32LE_00toFF_00toD7_00, + +#define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25) + from_UTF_32LE_00toFF_D8toDF_offsets, + from_UTF_32LE_00toFF_D8toDF_infos, + +#define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27) + from_UTF_32LE_00toFF_00toD7, from_UTF_32LE_00toFF_D8toDF, + +#define from_UTF_32LE_00toFF WORDINDEX2INFO(29) + from_UTF_32LE_00toFF_offsets, + from_UTF_32LE_00toFF_infos, + +#define from_UTF_32LE_infos WORDINDEX2INFO(31) + from_UTF_32LE_00toFF, + +#define from_UTF_32LE WORDINDEX2INFO(32) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_32LE_infos, + +#define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34) + FUNso, + +#define from_UTF_16BE_00toD7 WORDINDEX2INFO(35) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_16BE_00toD7_infos, + +#define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37) + INVALID, from_UTF_16BE_00toD7, + +#define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39) + from_UTF_16LE_00toFF_D8toDB_00toFF_offsets, + from_UTF_16BE_D8toDB_00toFF_infos, + +#define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41) + from_UTF_16BE_D8toDB_00toFF, + +#define from_UTF_16BE_D8toDB WORDINDEX2INFO(42) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_16BE_D8toDB_infos, + +#define from_UTF_16BE_infos WORDINDEX2INFO(44) + from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB, + INVALID, + +#define from_UTF_16BE WORDINDEX2INFO(47) + from_UTF_16LE_00toFF_offsets, + from_UTF_16BE_infos, + +#define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49) + from_UTF_16BE_00toD7, INVALID, + +#define from_UTF_32BE_00_00 WORDINDEX2INFO(51) + from_UTF_32LE_00toFF_offsets, + from_UTF_32BE_00_00_infos, + +#define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53) + from_UTF_16BE_00toD7, + +#define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_32BE_00_01to10_infos, + +#define from_UTF_32BE_00_infos WORDINDEX2INFO(56) + from_UTF_32BE_00_00, from_UTF_32BE_00_01to10, + INVALID, + +#define from_UTF_32BE_00 WORDINDEX2INFO(59) + from_UTF_32BE_00_offsets, + from_UTF_32BE_00_infos, + +#define from_UTF_32BE_infos WORDINDEX2INFO(61) + from_UTF_32BE_00, INVALID, + +#define from_UTF_32BE WORDINDEX2INFO(63) + from_UTF_32LE_00toFF_00toD7_00_offsets, + from_UTF_32BE_infos, + +#define from_UTF_16_00toFF_infos WORDINDEX2INFO(65) + FUNsi, + +#define from_UTF_16_00toFF WORDINDEX2INFO(66) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_16_00toFF_infos, + +#define from_UTF_16_infos WORDINDEX2INFO(68) + from_UTF_16_00toFF, + +#define from_UTF_16 WORDINDEX2INFO(69) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_16_infos, + +#define from_UTF_32_00toFF_infos WORDINDEX2INFO(71) + from_UTF_16, + +#define from_UTF_32_00toFF WORDINDEX2INFO(72) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_32_00toFF_infos, + +#define from_UTF_32_infos WORDINDEX2INFO(74) + from_UTF_32_00toFF, + +#define from_UTF_32 WORDINDEX2INFO(75) + from_UTF_16LE_00toFF_D8toDB_offsets, + from_UTF_32_infos, + +#define from_UTF_8_C2toDF WORDINDEX2INFO(77) + from_UTF_8_C2toDF_offsets, + from_UTF_16LE_00toFF_D8toDB_00toFF_infos, + +#define from_UTF_8_E0_infos WORDINDEX2INFO(79) + INVALID, from_UTF_8_C2toDF, + +#define from_UTF_8_E0 WORDINDEX2INFO(81) + from_UTF_8_E0_offsets, + from_UTF_8_E0_infos, + +#define from_UTF_8_E1toEC WORDINDEX2INFO(83) + from_UTF_8_C2toDF_offsets, + from_UTF_8_E0_infos, + +#define from_UTF_8_ED WORDINDEX2INFO(85) + from_UTF_8_ED_offsets, + from_UTF_8_E0_infos, + +#define from_UTF_8_F0_infos WORDINDEX2INFO(87) + INVALID, from_UTF_8_E1toEC, + +#define from_UTF_8_F0 WORDINDEX2INFO(89) + from_UTF_8_F0_offsets, + from_UTF_8_F0_infos, + +#define from_UTF_8_F1toF3 WORDINDEX2INFO(91) + from_UTF_8_C2toDF_offsets, + from_UTF_8_F0_infos, + +#define from_UTF_8_F4 WORDINDEX2INFO(93) + from_UTF_8_F4_offsets, + from_UTF_8_F0_infos, + +#define from_UTF_8_infos WORDINDEX2INFO(95) + FUNso, INVALID, + from_UTF_8_C2toDF, from_UTF_8_E0, + from_UTF_8_E1toEC, from_UTF_8_ED, + from_UTF_8_F0, from_UTF_8_F1toF3, + from_UTF_8_F4, + +#define from_UTF_8 WORDINDEX2INFO(104) + from_UTF_8_offsets, + from_UTF_8_infos, + +}; +#define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 106, ((int)sizeof(unsigned int)) + + +static ssize_t +fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + if (!s[0] && s[1]<0x80) { + o[0] = s[1]; + return 1; + } + else if (s[0]<0x08) { + o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6); + o[1] = 0x80 | (s[1]&0x3F); + return 2; + } + else if ((s[0]&0xF8)!=0xD8) { + o[0] = 0xE0 | (s[0]>>4); + o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6); + o[2] = 0x80 | (s[1]&0x3F); + return 3; + } + else { + unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1; + o[0] = 0xF0 | (u>>2); + o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F); + o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6); + o[3] = 0x80 | (s[3]&0x3F); + return 4; + } +} + +static ssize_t +fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + if (!(s[0]&0x80)) { + o[0] = 0x00; + o[1] = s[0]; + return 2; + } + else if ((s[0]&0xE0)==0xC0) { + o[0] = (s[0]>>2)&0x07; + o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F); + return 2; + } + else if ((s[0]&0xF0)==0xE0) { + o[0] = (s[0]<<4) | ((s[1]>>2)^0x20); + o[1] = (s[1]<<6) | (s[2]^0x80); + return 2; + } + else { + int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; + o[0] = 0xD8 | (w>>2); + o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); + o[2] = 0xDC | ((s[2]>>2)&0x03); + o[3] = (s[2]<<6) | (s[3]&~0x80); + return 4; + } +} + +static ssize_t +fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + if (!s[1] && s[0]<0x80) { + o[0] = s[0]; + return 1; + } + else if (s[1]<0x08) { + o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); + o[1] = 0x80 | (s[0]&0x3F); + return 2; + } + else if ((s[1]&0xF8)!=0xD8) { + o[0] = 0xE0 | (s[1]>>4); + o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); + o[2] = 0x80 | (s[0]&0x3F); + return 3; + } + else { + unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1; + o[0] = 0xF0 | u>>2; + o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F); + o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6); + o[3] = 0x80 | (s[2]&0x3F); + return 4; + } +} + +static ssize_t +fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + if (!(s[0]&0x80)) { + o[1] = 0x00; + o[0] = s[0]; + return 2; + } + else if ((s[0]&0xE0)==0xC0) { + o[1] = (s[0]>>2)&0x07; + o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); + return 2; + } + else if ((s[0]&0xF0)==0xE0) { + o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); + o[0] = (s[1]<<6) | (s[2]^0x80); + return 2; + } + else { + int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; + o[1] = 0xD8 | (w>>2); + o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); + o[3] = 0xDC | ((s[2]>>2)&0x03); + o[2] = (s[2]<<6) | (s[3]&~0x80); + return 4; + } +} + +static ssize_t +fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + if (!s[1]) { + if (s[2]==0 && s[3]<0x80) { + o[0] = s[3]; + return 1; + } + else if (s[2]<0x08) { + o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6); + o[1] = 0x80 | (s[3]&0x3F); + return 2; + } + else { + o[0] = 0xE0 | (s[2]>>4); + o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); + o[2] = 0x80 | (s[3]&0x3F); + return 3; + } + } + else { + o[0] = 0xF0 | (s[1]>>2); + o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4); + o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); + o[3] = 0x80 | (s[3]&0x3F); + return 4; + } +} + +static ssize_t +fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + o[0] = 0; + if (!(s[0]&0x80)) { + o[1] = o[2] = 0x00; + o[3] = s[0]; + } + else if ((s[0]&0xE0)==0xC0) { + o[1] = 0x00; + o[2] = (s[0]>>2)&0x07; + o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F); + } + else if ((s[0]&0xF0)==0xE0) { + o[1] = 0x00; + o[2] = (s[0]<<4) | ((s[1]>>2)^0x20); + o[3] = (s[1]<<6) | (s[2]^0x80); + } + else { + o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); + o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); + o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F); + } + return 4; +} + +static ssize_t +fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + if (!s[2]) { + if (s[1]==0 && s[0]<0x80) { + o[0] = s[0]; + return 1; + } + else if (s[1]<0x08) { + o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); + o[1] = 0x80 | (s[0]&0x3F); + return 2; + } + else { + o[0] = 0xE0 | (s[1]>>4); + o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); + o[2] = 0x80 | (s[0]&0x3F); + return 3; + } + } + else { + o[0] = 0xF0 | (s[2]>>2); + o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4); + o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); + o[3] = 0x80 | (s[0]&0x3F); + return 4; + } +} + +static ssize_t +fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + o[3] = 0; + if (!(s[0]&0x80)) { + o[2] = o[1] = 0x00; + o[0] = s[0]; + } + else if ((s[0]&0xE0)==0xC0) { + o[2] = 0x00; + o[1] = (s[0]>>2)&0x07; + o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); + } + else if ((s[0]&0xF0)==0xE0) { + o[2] = 0x00; + o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); + o[0] = (s[1]<<6) | (s[2]^0x80); + } + else { + o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); + o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); + o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F); + } + return 4; +} + +static int +state_init(void *statep) +{ + unsigned char *sp = statep; + *sp = 0; + return 0; +} + +static VALUE +fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l) +{ + #define BE 1 + #define LE 2 + unsigned char *sp = statep; + switch (*sp) { + case 0: + if (s[0] == 0xFE && s[1] == 0xFF) { + *sp = BE; + return ZERObt; + } + else if (s[0] == 0xFF && s[1] == 0xFE) { + *sp = LE; + return ZERObt; + } + break; + case BE: + if (s[0] < 0xD8 || 0xDF < s[0]) { + return (VALUE)FUNso; + } + else if (s[0] <= 0xDB) { + return (VALUE)from_UTF_16BE_D8toDB_00toFF; + } + break; + case LE: + if (s[1] < 0xD8 || 0xDF < s[1]) { + return (VALUE)FUNso; + } + else if (s[1] <= 0xDB) { + return (VALUE)from_UTF_16LE_00toFF_D8toDB; + } + break; + } + return (VALUE)INVALID; +} + +static ssize_t +fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned char *sp = statep; + switch (*sp) { + case BE: + return fun_so_from_utf_16be(statep, s, l, o, osize); + case LE: + return fun_so_from_utf_16le(statep, s, l, o, osize); + } + return 0; +} + +static VALUE +fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l) +{ + unsigned char *sp = statep; + switch (*sp) { + case 0: + if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) { + *sp = BE; + return ZERObt; + } + else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) { + *sp = LE; + return ZERObt; + } + break; + case BE: + if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) || + (s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2])))) + return (VALUE)FUNso; + break; + case LE: + if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) || + (s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1])))) + return (VALUE)FUNso; + break; + } + return (VALUE)INVALID; +} + +static ssize_t +fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned char *sp = statep; + switch (*sp) { + case BE: + return fun_so_from_utf_32be(statep, s, l, o, osize); + case LE: + return fun_so_from_utf_32le(statep, s, l, o, osize); + } + return 0; +} + +static ssize_t +fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned char *sp = statep; + if (*sp == 0) { + *o++ = 0xFE; + *o++ = 0xFF; + *sp = 1; + return 2 + fun_so_to_utf_16be(statep, s, l, o, osize); + } + return fun_so_to_utf_16be(statep, s, l, o, osize); +} + +static ssize_t +fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) +{ + unsigned char *sp = statep; + if (*sp == 0) { + *o++ = 0x00; + *o++ = 0x00; + *o++ = 0xFE; + *o++ = 0xFF; + *sp = 1; + return 4 + fun_so_to_utf_32be(statep, s, l, o, osize); + } + return fun_so_to_utf_32be(statep, s, l, o, osize); +} + +static const rb_transcoder +rb_from_UTF_16BE = { + "UTF-16BE", "UTF-8", from_UTF_16BE, + TRANSCODE_TABLE_INFO, + 2, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_decoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_from_utf_16be +}; + +static const rb_transcoder +rb_to_UTF_16BE = { + "UTF-8", "UTF-16BE", from_UTF_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_encoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_to_utf_16be +}; + +static const rb_transcoder +rb_from_UTF_16LE = { + "UTF-16LE", "UTF-8", from_UTF_16LE, + TRANSCODE_TABLE_INFO, + 2, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_decoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_from_utf_16le +}; + +static const rb_transcoder +rb_to_UTF_16LE = { + "UTF-8", "UTF-16LE", from_UTF_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_encoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_to_utf_16le +}; + +static const rb_transcoder +rb_from_UTF_32BE = { + "UTF-32BE", "UTF-8", from_UTF_32BE, + TRANSCODE_TABLE_INFO, + 4, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_decoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_from_utf_32be +}; + +static const rb_transcoder +rb_to_UTF_32BE = { + "UTF-8", "UTF-32BE", from_UTF_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_encoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_to_utf_32be +}; + +static const rb_transcoder +rb_from_UTF_32LE = { + "UTF-32LE", "UTF-8", from_UTF_32LE, + TRANSCODE_TABLE_INFO, + 4, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_decoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_from_utf_32le +}; + +static const rb_transcoder +rb_to_UTF_32LE = { + "UTF-8", "UTF-32LE", from_UTF_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_encoder, /* asciicompat_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_to_utf_32le +}; + +static const rb_transcoder +rb_from_UTF_16 = { + "UTF-16", "UTF-8", from_UTF_16, + TRANSCODE_TABLE_INFO, + 2, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_decoder, /* asciicompat_type */ + 1, state_init, NULL, /* state_size, state_init, state_fini */ + NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16 +}; + +static const rb_transcoder +rb_from_UTF_32 = { + "UTF-32", "UTF-8", from_UTF_32, + TRANSCODE_TABLE_INFO, + 4, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_decoder, /* asciicompat_type */ + 1, state_init, NULL, /* state_size, state_init, state_fini */ + NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32 +}; + +static const rb_transcoder +rb_to_UTF_16 = { + "UTF-8", "UTF-16", from_UTF_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_encoder, /* asciicompat_type */ + 1, state_init, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_to_utf_16 +}; + +static const rb_transcoder +rb_to_UTF_32 = { + "UTF-8", "UTF-32", from_UTF_8, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + asciicompat_encoder, /* asciicompat_type */ + 1, state_init, NULL, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_to_utf_32 +}; + +TRANS_INIT(utf_16_32) +{ + rb_register_transcoder(&rb_from_UTF_16BE); + rb_register_transcoder(&rb_to_UTF_16BE); + rb_register_transcoder(&rb_from_UTF_16LE); + rb_register_transcoder(&rb_to_UTF_16LE); + rb_register_transcoder(&rb_from_UTF_32BE); + rb_register_transcoder(&rb_to_UTF_32BE); + rb_register_transcoder(&rb_from_UTF_32LE); + rb_register_transcoder(&rb_to_UTF_32LE); + rb_register_transcoder(&rb_from_UTF_16); + rb_register_transcoder(&rb_to_UTF_16); + rb_register_transcoder(&rb_from_UTF_32); + rb_register_transcoder(&rb_to_UTF_32); +} + |