1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
# coding: US-ASCII
##
# This class is a wrapper around File IO and Encoding that helps RDoc load
# files and convert them to the correct encoding.
module RDoc::Encoding
##
# Reads the contents of +filename+ and handles any encoding directives in
# the file.
#
# The content will be converted to the +encoding+. If the file cannot be
# converted a warning will be printed and nil will be returned.
#
# If +force_transcode+ is true the document will be transcoded and any
# unknown character in the target encoding will be replaced with '?'
def self.read_file filename, encoding, force_transcode = false
content = open filename, "rb" do |f| f.read end
content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/
utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
RDoc::Encoding.set_encoding content
if Object.const_defined? :Encoding then
begin
encoding ||= Encoding.default_external
orig_encoding = content.encoding
if not orig_encoding.ascii_compatible? then
content.encode! encoding
elsif utf8 then
content.force_encoding Encoding::UTF_8
content.encode! encoding
else
# assume the content is in our output encoding
content.force_encoding encoding
end
unless content.valid_encoding? then
# revert and try to transcode
content.force_encoding orig_encoding
content.encode! encoding
end
unless content.valid_encoding? then
warn "unable to convert #{filename} to #{encoding}, skipping"
content = nil
end
rescue Encoding::InvalidByteSequenceError,
Encoding::UndefinedConversionError => e
if force_transcode then
content.force_encoding orig_encoding
content.encode!(encoding,
:invalid => :replace, :undef => :replace,
:replace => '?')
return content
else
warn "unable to convert #{e.message} for #{filename}, skipping"
return nil
end
end
end
content
rescue ArgumentError => e
raise unless e.message =~ /unknown encoding name - (.*)/
warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
nil
rescue Errno::EISDIR, Errno::ENOENT
nil
end
##
# Sets the encoding of +string+ based on the magic comment
def self.set_encoding string
string =~ /\A(?:#!.*\n)?(.*\n)/
first_line = $1
name = case first_line
when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2
when /\b(?:en)?coding[=:]\s*([^\s;]+)/i then $1
else return
end
string.sub! first_line, ''
return unless Object.const_defined? :Encoding
enc = Encoding.find name
string.force_encoding enc if enc
end
end
|