From fcbf63e62c627deae76c1b8cb8c0876c536ed811 Mon Sep 17 00:00:00 2001 From: Jari Vetoniemi Date: Mon, 16 Mar 2020 18:49:26 +0900 Subject: Fresh start --- jni/ruby/lib/rdoc/encoding.rb | 99 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 jni/ruby/lib/rdoc/encoding.rb (limited to 'jni/ruby/lib/rdoc/encoding.rb') diff --git a/jni/ruby/lib/rdoc/encoding.rb b/jni/ruby/lib/rdoc/encoding.rb new file mode 100644 index 0000000..b3515a4 --- /dev/null +++ b/jni/ruby/lib/rdoc/encoding.rb @@ -0,0 +1,99 @@ +# coding: US-ASCII + +## +# This class is a wrapper around File IO and Encoding that helps RDoc load +# files and convert them to the correct encoding. + +module RDoc::Encoding + + ## + # Reads the contents of +filename+ and handles any encoding directives in + # the file. + # + # The content will be converted to the +encoding+. If the file cannot be + # converted a warning will be printed and nil will be returned. + # + # If +force_transcode+ is true the document will be transcoded and any + # unknown character in the target encoding will be replaced with '?' + + def self.read_file filename, encoding, force_transcode = false + content = open filename, "rb" do |f| f.read end + content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/ + + utf8 = content.sub!(/\A\xef\xbb\xbf/, '') + + RDoc::Encoding.set_encoding content + + if Object.const_defined? :Encoding then + begin + encoding ||= Encoding.default_external + orig_encoding = content.encoding + + if not orig_encoding.ascii_compatible? then + content.encode! encoding + elsif utf8 then + content.force_encoding Encoding::UTF_8 + content.encode! encoding + else + # assume the content is in our output encoding + content.force_encoding encoding + end + + unless content.valid_encoding? then + # revert and try to transcode + content.force_encoding orig_encoding + content.encode! encoding + end + + unless content.valid_encoding? then + warn "unable to convert #{filename} to #{encoding}, skipping" + content = nil + end + rescue Encoding::InvalidByteSequenceError, + Encoding::UndefinedConversionError => e + if force_transcode then + content.force_encoding orig_encoding + content.encode!(encoding, + :invalid => :replace, :undef => :replace, + :replace => '?') + return content + else + warn "unable to convert #{e.message} for #{filename}, skipping" + return nil + end + end + end + + content + rescue ArgumentError => e + raise unless e.message =~ /unknown encoding name - (.*)/ + warn "unknown encoding name \"#{$1}\" for #{filename}, skipping" + nil + rescue Errno::EISDIR, Errno::ENOENT + nil + end + + ## + # Sets the encoding of +string+ based on the magic comment + + def self.set_encoding string + string =~ /\A(?:#!.*\n)?(.*\n)/ + + first_line = $1 + + name = case first_line + when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2 + when /\b(?:en)?coding[=:]\s*([^\s;]+)/i then $1 + else return + end + + string.sub! first_line, '' + + return unless Object.const_defined? :Encoding + + enc = Encoding.find name + string.force_encoding enc if enc + end + +end + -- cgit v1.2.3