diff options
author | Jari Vetoniemi <jari.vetoniemi@indooratlas.com> | 2020-03-16 18:49:26 +0900 |
---|---|---|
committer | Jari Vetoniemi <jari.vetoniemi@indooratlas.com> | 2020-03-30 00:39:06 +0900 |
commit | fcbf63e62c627deae76c1b8cb8c0876c536ed811 (patch) | |
tree | 64cb17de3f41a2b6fef2368028fbd00349946994 /jni/ruby/lib/rdoc/text.rb |
Fresh start
Diffstat (limited to 'jni/ruby/lib/rdoc/text.rb')
-rw-r--r-- | jni/ruby/lib/rdoc/text.rb | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/jni/ruby/lib/rdoc/text.rb b/jni/ruby/lib/rdoc/text.rb new file mode 100644 index 0000000..606f15e --- /dev/null +++ b/jni/ruby/lib/rdoc/text.rb @@ -0,0 +1,324 @@ +# coding: utf-8 + +## +# For RDoc::Text#to_html + +require 'strscan' + +## +# For RDoc::Text#snippet + +begin + gem 'json' +rescue Gem::LoadError +end + +require 'json' + +## +# Methods for manipulating comment text + +module RDoc::Text + + ## + # Maps markup formats to classes that can parse them. If the format is + # unknown, "rdoc" format is used. + + MARKUP_FORMAT = { + 'markdown' => RDoc::Markdown, + 'rdoc' => RDoc::Markup, + 'rd' => RDoc::RD, + 'tomdoc' => RDoc::TomDoc, + } + + MARKUP_FORMAT.default = RDoc::Markup + + ## + # Maps an encoding to a Hash of characters properly transcoded for that + # encoding. + # + # See also encode_fallback. + + TO_HTML_CHARACTERS = Hash.new do |h, encoding| + h[encoding] = { + :close_dquote => encode_fallback('”', encoding, '"'), + :close_squote => encode_fallback('’', encoding, '\''), + :copyright => encode_fallback('©', encoding, '(c)'), + :ellipsis => encode_fallback('…', encoding, '...'), + :em_dash => encode_fallback('—', encoding, '---'), + :en_dash => encode_fallback('–', encoding, '--'), + :open_dquote => encode_fallback('“', encoding, '"'), + :open_squote => encode_fallback('‘', encoding, '\''), + :trademark => encode_fallback('®', encoding, '(r)'), + } + end if Object.const_defined? :Encoding + + ## + # Transcodes +character+ to +encoding+ with a +fallback+ character. + + def self.encode_fallback character, encoding, fallback + character.encode(encoding, :fallback => { character => fallback }, + :undef => :replace, :replace => fallback) + end + + ## + # Expands tab characters in +text+ to eight spaces + + def expand_tabs text + expanded = [] + + text.each_line do |line| + nil while line.gsub!(/(?:\G|\r)((?:.{8})*?)([^\t\r\n]{0,7})\t/) do + r = "#{$1}#{$2}#{' ' * (8 - $2.size)}" + r.force_encoding text.encoding if Object.const_defined? :Encoding + r + end + + expanded << line + end + + expanded.join + end + + ## + # Flush +text+ left based on the shortest line + + def flush_left text + indent = 9999 + + text.each_line do |line| + line_indent = line =~ /\S/ || 9999 + indent = line_indent if indent > line_indent + end + + empty = '' + empty.force_encoding text.encoding if Object.const_defined? :Encoding + + text.gsub(/^ {0,#{indent}}/, empty) + end + + ## + # Convert a string in markup format into HTML. + # + # Requires the including class to implement #formatter + + def markup text + if @store.rdoc.options + locale = @store.rdoc.options.locale + else + locale = nil + end + if locale + i18n_text = RDoc::I18n::Text.new(text) + text = i18n_text.translate(locale) + end + parse(text).accept formatter + end + + ## + # Strips hashes, expands tabs then flushes +text+ to the left + + def normalize_comment text + return text if text.empty? + + text = strip_stars text + text = strip_hashes text + text = expand_tabs text + text = flush_left text + text = strip_newlines text + text + end + + ## + # Normalizes +text+ then builds a RDoc::Markup::Document from it + + def parse text, format = 'rdoc' + return text if RDoc::Markup::Document === text + return text.parse if RDoc::Comment === text + + text = normalize_comment text # TODO remove, should not be necessary + + return RDoc::Markup::Document.new if text =~ /\A\n*\z/ + + MARKUP_FORMAT[format].parse text + end + + ## + # The first +limit+ characters of +text+ as HTML + + def snippet text, limit = 100 + document = parse text + + RDoc::Markup::ToHtmlSnippet.new(options, limit).convert document + end + + ## + # Strips leading # characters from +text+ + + def strip_hashes text + return text if text =~ /^(?>\s*)[^\#]/ + + empty = '' + empty.force_encoding text.encoding if Object.const_defined? :Encoding + + text.gsub(/^\s*(#+)/) { $1.tr '#', ' ' }.gsub(/^\s+$/, empty) + end + + ## + # Strips leading and trailing \n characters from +text+ + + def strip_newlines text + text.gsub(/\A\n*(.*?)\n*\z/m) do $1 end # block preserves String encoding + end + + ## + # Strips /* */ style comments + + def strip_stars text + return text unless text =~ %r%/\*.*\*/%m + + encoding = text.encoding if Object.const_defined? :Encoding + + text = text.gsub %r%Document-method:\s+[\w:.#=!?]+%, '' + + space = ' ' + space.force_encoding encoding if encoding + + text.sub! %r%/\*+% do space * $&.length end + text.sub! %r%\*+/% do space * $&.length end + text.gsub! %r%^[ \t]*\*%m do space * $&.length end + + empty = '' + empty.force_encoding encoding if encoding + text.gsub(/^\s+$/, empty) + end + + ## + # Converts ampersand, dashes, ellipsis, quotes, copyright and registered + # trademark symbols in +text+ to properly encoded characters. + + def to_html text + if Object.const_defined? :Encoding then + html = ''.encode text.encoding + + encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding] + else + html = '' + encoded = { + :close_dquote => '”', + :close_squote => '’', + :copyright => '©', + :ellipsis => '…', + :em_dash => '—', + :en_dash => '–', + :open_dquote => '“', + :open_squote => '‘', + :trademark => '®', + } + end + + s = StringScanner.new text + insquotes = false + indquotes = false + after_word = nil + + until s.eos? do + case + when s.scan(/<(tt|code)>.*?<\/\1>/) then # skip contents of tt + html << s.matched.gsub('\\\\', '\\') + when s.scan(/<(tt|code)>.*?/) then + warn "mismatched <#{s[1]}> tag" # TODO signal file/line + html << s.matched + when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags + html << s.matched + when s.scan(/\\(\S)/) then # unhandled suppressed crossref + html << s[1] + after_word = nil + when s.scan(/\.\.\.(\.?)/) then + html << s[1] << encoded[:ellipsis] + after_word = nil + when s.scan(/\(c\)/) then + html << encoded[:copyright] + after_word = nil + when s.scan(/\(r\)/) then + html << encoded[:trademark] + after_word = nil + when s.scan(/---/) then + html << encoded[:em_dash] + after_word = nil + when s.scan(/--/) then + html << encoded[:en_dash] + after_word = nil + when s.scan(/"|"/) then + html << encoded[indquotes ? :close_dquote : :open_dquote] + indquotes = !indquotes + after_word = nil + when s.scan(/``/) then # backtick double quote + html << encoded[:open_dquote] + after_word = nil + when s.scan(/''/) then # tick double quote + html << encoded[:close_dquote] + after_word = nil + when s.scan(/'/) then # single quote + if insquotes + html << encoded[:close_squote] + insquotes = false + elsif after_word + # Mary's dog, my parents' house: do not start paired quotes + html << encoded[:close_squote] + else + html << encoded[:open_squote] + insquotes = true + end + + after_word = nil + else # advance to the next potentially significant character + match = s.scan(/.+?(?=[<\\.("'`&-])/) #" + + if match then + html << match + after_word = match =~ /\w$/ + else + html << s.rest + break + end + end + end + + html + end + + ## + # Wraps +txt+ to +line_len+ + + def wrap(txt, line_len = 76) + res = [] + sp = 0 + ep = txt.length + + while sp < ep + # scan back for a space + p = sp + line_len - 1 + if p >= ep + p = ep + else + while p > sp and txt[p] != ?\s + p -= 1 + end + if p <= sp + p = sp + line_len + while p < ep and txt[p] != ?\s + p += 1 + end + end + end + res << txt[sp...p] << "\n" + sp = p + sp += 1 while sp < ep and txt[sp] == ?\s + end + + res.join.strip + end + +end + |