diff options
Diffstat (limited to 'jni/ruby/ext/nkf/lib')
-rw-r--r-- | jni/ruby/ext/nkf/lib/kconv.rb | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/jni/ruby/ext/nkf/lib/kconv.rb b/jni/ruby/ext/nkf/lib/kconv.rb new file mode 100644 index 0000000..25e04ed --- /dev/null +++ b/jni/ruby/ext/nkf/lib/kconv.rb @@ -0,0 +1,282 @@ +# +# kconv.rb - Kanji Converter. +# +# $Id: kconv.rb 30112 2010-12-07 11:47:39Z naruse $ +# +# ---- +# +# kconv.rb implements the Kconv class for Kanji Converter. Additionally, +# some methods in String classes are added to allow easy conversion. +# + +require 'nkf' + +# +# Kanji Converter for Ruby. +# +module Kconv + # + # Public Constants + # + + #Constant of Encoding + + # Auto-Detect + AUTO = NKF::AUTO + # ISO-2022-JP + JIS = NKF::JIS + # EUC-JP + EUC = NKF::EUC + # Shift_JIS + SJIS = NKF::SJIS + # BINARY + BINARY = NKF::BINARY + # NOCONV + NOCONV = NKF::NOCONV + # ASCII + ASCII = NKF::ASCII + # UTF-8 + UTF8 = NKF::UTF8 + # UTF-16 + UTF16 = NKF::UTF16 + # UTF-32 + UTF32 = NKF::UTF32 + # UNKNOWN + UNKNOWN = NKF::UNKNOWN + + # + # Public Methods + # + + # call-seq: + # Kconv.kconv(str, to_enc, from_enc=nil) + # + # Convert <code>str</code> to <code>to_enc</code>. + # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects. + def kconv(str, to_enc, from_enc=nil) + opt = '' + opt += ' --ic=' + from_enc.to_s if from_enc + opt += ' --oc=' + to_enc.to_s if to_enc + + ::NKF::nkf(opt, str) + end + module_function :kconv + + # + # Encode to + # + + # call-seq: + # Kconv.tojis(str) => string + # + # Convert <code>str</code> to ISO-2022-JP + def tojis(str) + kconv(str, JIS) + end + module_function :tojis + + # call-seq: + # Kconv.toeuc(str) => string + # + # Convert <code>str</code> to EUC-JP + def toeuc(str) + kconv(str, EUC) + end + module_function :toeuc + + # call-seq: + # Kconv.tosjis(str) => string + # + # Convert <code>str</code> to Shift_JIS + def tosjis(str) + kconv(str, SJIS) + end + module_function :tosjis + + # call-seq: + # Kconv.toutf8(str) => string + # + # Convert <code>str</code> to UTF-8 + def toutf8(str) + kconv(str, UTF8) + end + module_function :toutf8 + + # call-seq: + # Kconv.toutf16(str) => string + # + # Convert <code>str</code> to UTF-16 + def toutf16(str) + kconv(str, UTF16) + end + module_function :toutf16 + + # call-seq: + # Kconv.toutf32(str) => string + # + # Convert <code>str</code> to UTF-32 + def toutf32(str) + kconv(str, UTF32) + end + module_function :toutf32 + + # call-seq: + # Kconv.tolocale => string + # + # Convert <code>self</code> to locale encoding + def tolocale(str) + kconv(str, Encoding.locale_charmap) + end + module_function :tolocale + + # + # guess + # + + # call-seq: + # Kconv.guess(str) => encoding + # + # Guess input encoding by NKF.guess + def guess(str) + ::NKF::guess(str) + end + module_function :guess + + # + # isEncoding + # + + # call-seq: + # Kconv.iseuc(str) => true or false + # + # Returns whether input encoding is EUC-JP or not. + # + # *Note* don't expect this return value is MatchData. + def iseuc(str) + str.dup.force_encoding(EUC).valid_encoding? + end + module_function :iseuc + + # call-seq: + # Kconv.issjis(str) => true or false + # + # Returns whether input encoding is Shift_JIS or not. + def issjis(str) + str.dup.force_encoding(SJIS).valid_encoding? + end + module_function :issjis + + # call-seq: + # Kconv.isjis(str) => true or false + # + # Returns whether input encoding is ISO-2022-JP or not. + def isjis(str) + /\A [\t\n\r\x20-\x7E]* + (?: + (?:\x1b \x28 I [\x21-\x7E]* + |\x1b \x28 J [\x21-\x7E]* + |\x1b \x24 @ (?:[\x21-\x7E]{2})* + |\x1b \x24 B (?:[\x21-\x7E]{2})* + |\x1b \x24 \x28 D (?:[\x21-\x7E]{2})* + )* + \x1b \x28 B [\t\n\r\x20-\x7E]* + )* + \z/nox =~ str.dup.force_encoding('BINARY') ? true : false + end + module_function :isjis + + # call-seq: + # Kconv.isutf8(str) => true or false + # + # Returns whether input encoding is UTF-8 or not. + def isutf8(str) + str.dup.force_encoding(UTF8).valid_encoding? + end + module_function :isutf8 +end + +class String + # call-seq: + # String#kconv(to_enc, from_enc) + # + # Convert <code>self</code> to <code>to_enc</code>. + # <code>to_enc</code> and <code>from_enc</code> are given as constants of Kconv or Encoding objects. + def kconv(to_enc, from_enc=nil) + from_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0] + Kconv::kconv(self, to_enc, from_enc) + end + + # + # to Encoding + # + + # call-seq: + # String#tojis => string + # + # Convert <code>self</code> to ISO-2022-JP + def tojis; Kconv.tojis(self) end + + # call-seq: + # String#toeuc => string + # + # Convert <code>self</code> to EUC-JP + def toeuc; Kconv.toeuc(self) end + + # call-seq: + # String#tosjis => string + # + # Convert <code>self</code> to Shift_JIS + def tosjis; Kconv.tosjis(self) end + + # call-seq: + # String#toutf8 => string + # + # Convert <code>self</code> to UTF-8 + def toutf8; Kconv.toutf8(self) end + + # call-seq: + # String#toutf16 => string + # + # Convert <code>self</code> to UTF-16 + def toutf16; Kconv.toutf16(self) end + + # call-seq: + # String#toutf32 => string + # + # Convert <code>self</code> to UTF-32 + def toutf32; Kconv.toutf32(self) end + + # call-seq: + # String#tolocale => string + # + # Convert <code>self</code> to locale encoding + def tolocale; Kconv.tolocale(self) end + + # + # is Encoding + # + + # call-seq: + # String#iseuc => true or false + # + # Returns whether <code>self</code>'s encoding is EUC-JP or not. + def iseuc; Kconv.iseuc(self) end + + # call-seq: + # String#issjis => true or false + # + # Returns whether <code>self</code>'s encoding is Shift_JIS or not. + def issjis; Kconv.issjis(self) end + + # call-seq: + # String#isjis => true or false + # + # Returns whether <code>self</code>'s encoding is ISO-2022-JP or not. + def isjis; Kconv.isjis(self) end + + # call-seq: + # String#isutf8 => true or false + # + # Returns whether <code>self</code>'s encoding is UTF-8 or not. + def isutf8; Kconv.isutf8(self) end +end |