1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
#!/usr/bin/env ruby
# example:
# ./enc-emoji4unicode.rb emoji4unicode.xml > ../enc/trans/emoji-exchange-tbl.rb
require 'rexml/document'
require File.expand_path("../transcode-tblgen", __FILE__)
class EmojiTable
VERBOSE_MODE = false
def initialize(xml_path)
@doc = REXML::Document.new File.open(xml_path)
@kddi_undoc = make_kddi_undoc_map()
end
def conversion(from_carrier, to_carrier, &block)
REXML::XPath.each(@doc.root, '//e') do |e|
from = e.attribute(from_carrier.downcase).to_s
to = e.attribute(to_carrier.downcase).to_s
text_fallback = e.attribute('text_fallback').to_s
name = e.attribute('name').to_s
if from =~ /^(?:\*|\+)(.+)$/ # proposed or unified
from = $1
end
if from.empty? || from !~ /^[0-9A-F]+$/
# do nothing
else
from_utf8 = [from.hex].pack("U").unpack("H*").first
if to =~ /^(?:>|\*)?([0-9A-F\+]+)$/
str_to = $1
if str_to =~ /^\+/ # unicode "proposed" begins at "+"
proposal = true
str_to.sub!(/^\+/, '')
else
proposal = false
end
tos = str_to.split('+')
to_utf8 = tos.map(&:hex).pack("U*").unpack("H*").first
comment = "[%s] U+%X -> %s" % [name, from.hex, tos.map{|c| "U+%X"%c.hex}.join(' ')]
block.call(:from => from_utf8,
:to => to_utf8,
:comment => comment,
:fallback => false,
:proposal => proposal)
elsif to.empty?
if text_fallback.empty?
comment = "[%s] U+%X -> U+3013 (GETA)" % [name, from.hex]
block.call(:from => from_utf8,
:to => "\u{3013}".unpack("H*").first,
:comment => comment, # geta
:fallback => true,
:proposal => false)
else
to_utf8 = text_fallback.unpack("H*").first
comment = %([%s] U+%X -> "%s") % [name, from.hex, text_fallback]
block.call(:from => from_utf8,
:to => to_utf8,
:comment => comment,
:fallback => true,
:proposal => false)
end
else
raise "something wrong: %s -> %s" % [from, to]
end
end
end
end
def generate(io, from_carrier, to_carrier)
from_encoding = (from_carrier == "Unicode") ? "UTF-8" : "UTF8-"+from_carrier
to_encoding = (to_carrier == "Unicode" ) ? "UTF-8" : "UTF8-"+to_carrier
io.puts "EMOJI_EXCHANGE_TBL['#{from_encoding}']['#{to_encoding}'] = ["
io.puts " # for documented codepoints" if from_carrier == "KDDI"
self.conversion(from_carrier, to_carrier) do |params|
from, to = params[:from], %Q{"#{params[:to]}"}
to = ":undef" if params[:fallback] || params[:proposal]
io.puts %{ ["#{from}", #{to}], # #{params[:comment]}}
end
if from_carrier == "KDDI"
io.puts " # for undocumented codepoints"
self.conversion(from_carrier, to_carrier) do |params|
from, to = params[:from], %Q{"#{params[:to]}"}
to = ":undef" if params[:fallback] || params[:proposal]
unicode = utf8_to_ucs(from)
undoc = ucs_to_utf8(@kddi_undoc[unicode])
io.puts %{ ["#{undoc}", #{to}], # #{params[:comment]}}
end
end
io.puts "]"
io.puts
end
private
def utf8_to_ucs(cp)
return [cp].pack("H*").unpack("U*").first
end
def ucs_to_utf8(cp)
return [cp].pack("U*").unpack("H*").first
end
def make_kddi_undoc_map()
pub_to_sjis = citrus_decode_mapsrc(
"mskanji", 2, "UCS/EMOJI_SHIFT_JIS-KDDI").sort_by{|u, s| s}
sjis_to_undoc = citrus_decode_mapsrc(
"mskanji", 2, "EMOJI_SHIFT_JIS-KDDI-UNDOC/UCS").sort_by{|s, u| s}
return pub_to_sjis.zip(sjis_to_undoc).inject({}) {|h, rec|
raise "no match sjis codepoint" if rec[0][1] != rec[1][0]
h[rec[0][0]] = rec[1][1]
next h
}
end
end
if ARGV.empty?
puts "usage: #$0 [emoji4unicode.xml]"
exit 1
end
$srcdir = File.expand_path("../../enc/trans", __FILE__)
emoji_table = EmojiTable.new(ARGV[0])
companies = %w(DoCoMo KDDI SoftBank Unicode)
io = STDOUT
io.puts "EMOJI_EXCHANGE_TBL = Hash.new{|h,k| h[k] = {}}"
companies.each do |from_company|
companies.each do |to_company|
next if from_company == to_company
emoji_table.generate(io, from_company, to_company)
end
end
|