summaryrefslogtreecommitdiff
path: root/jni/ruby/lib/rdoc/ruby_lex.rb
diff options
context:
space:
mode:
authorJari Vetoniemi <jari.vetoniemi@indooratlas.com>2020-03-16 18:49:26 +0900
committerJari Vetoniemi <jari.vetoniemi@indooratlas.com>2020-03-30 00:39:06 +0900
commitfcbf63e62c627deae76c1b8cb8c0876c536ed811 (patch)
tree64cb17de3f41a2b6fef2368028fbd00349946994 /jni/ruby/lib/rdoc/ruby_lex.rb
Fresh start
Diffstat (limited to 'jni/ruby/lib/rdoc/ruby_lex.rb')
-rw-r--r--jni/ruby/lib/rdoc/ruby_lex.rb1377
1 files changed, 1377 insertions, 0 deletions
diff --git a/jni/ruby/lib/rdoc/ruby_lex.rb b/jni/ruby/lib/rdoc/ruby_lex.rb
new file mode 100644
index 0000000..91b90ab
--- /dev/null
+++ b/jni/ruby/lib/rdoc/ruby_lex.rb
@@ -0,0 +1,1377 @@
+# coding: US-ASCII
+
+#--
+# irb/ruby-lex.rb - ruby lexcal analyzer
+# $Release Version: 0.9.5$
+# $Revision: 17979 $
+# $Date: 2008-07-09 10:17:05 -0700 (Wed, 09 Jul 2008) $
+# by Keiju ISHITSUKA(keiju@ruby-lang.org)
+#
+#++
+
+require "e2mmap"
+require "irb/slex"
+require "stringio"
+
+##
+# Ruby lexer adapted from irb.
+#
+# The internals are not documented because they are scary.
+
+class RDoc::RubyLex
+
+ ##
+ # Raised upon invalid input
+
+ class Error < RDoc::Error
+ end
+
+ # :stopdoc:
+
+ extend Exception2MessageMapper
+
+ def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
+ def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
+ def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
+ def_exception(:TkReading2TokenDuplicateError,
+ "key duplicate(token_n='%s', key='%s')")
+ def_exception(:SyntaxError, "%s")
+
+ def_exception(:TerminateLineInput, "Terminate Line Input")
+
+ include RDoc::RubyToken
+ include IRB
+
+ attr_accessor :continue
+ attr_accessor :lex_state
+ attr_reader :reader
+
+ class << self
+ attr_accessor :debug_level
+ end
+
+ def self.debug?
+ @debug_level > 0
+ end
+
+ self.debug_level = 0
+
+ # :startdoc:
+
+ ##
+ # Returns an Array of +ruby+ tokens. See ::new for a description of
+ # +options+.
+
+ def self.tokenize ruby, options
+ tokens = []
+
+ scanner = RDoc::RubyLex.new ruby, options
+ scanner.exception_on_syntax_error = true
+
+ while token = scanner.token do
+ tokens << token
+ end
+
+ tokens
+ end
+
+ ##
+ # Creates a new lexer for +content+. +options+ is an RDoc::Options, only
+ # +tab_width is used.
+
+ def initialize(content, options)
+ lex_init
+
+ if /\t/ =~ content then
+ tab_width = options.tab_width
+ content = content.split(/\n/).map do |line|
+ 1 while line.gsub!(/\t+/) {
+ ' ' * (tab_width*$&.length - $`.length % tab_width)
+ } && $~
+ line
+ end.join("\n")
+ end
+
+ content << "\n" unless content[-1, 1] == "\n"
+
+ set_input StringIO.new content
+
+ @base_char_no = 0
+ @char_no = 0
+ @exp_line_no = @line_no = 1
+ @here_readed = []
+ @readed = []
+ @rests = []
+ @seek = 0
+
+ @here_header = false
+ @indent = 0
+ @indent_stack = []
+ @lex_state = :EXPR_BEG
+ @space_seen = false
+
+ @continue = false
+ @line = ""
+
+ @skip_space = false
+ @readed_auto_clean_up = false
+ @exception_on_syntax_error = true
+
+ @prompt = nil
+ @prev_seek = nil
+ @ltype = nil
+ end
+
+ # :stopdoc:
+
+ def inspect # :nodoc:
+ "#<%s:0x%x pos %d lex_state %p space_seen %p>" % [
+ self.class, object_id,
+ @io.pos, @lex_state, @space_seen,
+ ]
+ end
+
+ attr_accessor :skip_space
+ attr_accessor :readed_auto_clean_up
+ attr_accessor :exception_on_syntax_error
+
+ attr_reader :seek
+ attr_reader :char_no
+ attr_reader :line_no
+ attr_reader :indent
+
+ # io functions
+ def set_input(io, p = nil, &block)
+ @io = io
+ if p.respond_to?(:call)
+ @input = p
+ elsif block_given?
+ @input = block
+ else
+ @input = Proc.new{@io.gets}
+ end
+ end
+
+ def get_readed
+ if idx = @readed.rindex("\n")
+ @base_char_no = @readed.size - (idx + 1)
+ else
+ @base_char_no += @readed.size
+ end
+
+ readed = @readed.join("")
+ @readed = []
+ readed
+ end
+
+ def getc
+ while @rests.empty?
+ # return nil unless buf_input
+ @rests.push nil unless buf_input
+ end
+ c = @rests.shift
+ if @here_header
+ @here_readed.push c
+ else
+ @readed.push c
+ end
+ @seek += 1
+ if c == "\n"
+ @line_no += 1
+ @char_no = 0
+ else
+ @char_no += 1
+ end
+
+ c
+ end
+
+ def gets
+ l = ""
+ while c = getc
+ l.concat(c)
+ break if c == "\n"
+ end
+ return nil if l == "" and c.nil?
+ l
+ end
+
+ def eof?
+ @io.eof?
+ end
+
+ def getc_of_rests
+ if @rests.empty?
+ nil
+ else
+ getc
+ end
+ end
+
+ def ungetc(c = nil)
+ if @here_readed.empty?
+ c2 = @readed.pop
+ else
+ c2 = @here_readed.pop
+ end
+ c = c2 unless c
+ @rests.unshift c #c =
+ @seek -= 1
+ if c == "\n"
+ @line_no -= 1
+ if idx = @readed.rindex("\n")
+ @char_no = idx + 1
+ else
+ @char_no = @base_char_no + @readed.size
+ end
+ else
+ @char_no -= 1
+ end
+ end
+
+ def peek_equal?(str)
+ chrs = str.split(//)
+ until @rests.size >= chrs.size
+ return false unless buf_input
+ end
+ @rests[0, chrs.size] == chrs
+ end
+
+ def peek_match?(regexp)
+ while @rests.empty?
+ return false unless buf_input
+ end
+ regexp =~ @rests.join("")
+ end
+
+ def peek(i = 0)
+ while @rests.size <= i
+ return nil unless buf_input
+ end
+ @rests[i]
+ end
+
+ def buf_input
+ prompt
+ line = @input.call
+ return nil unless line
+ @rests.concat line.split(//)
+ true
+ end
+ private :buf_input
+
+ def set_prompt(p = nil, &block)
+ p = block if block_given?
+ if p.respond_to?(:call)
+ @prompt = p
+ else
+ @prompt = Proc.new{print p}
+ end
+ end
+
+ def prompt
+ if @prompt
+ @prompt.call(@ltype, @indent, @continue, @line_no)
+ end
+ end
+
+ def initialize_input
+ @ltype = nil
+ @quoted = nil
+ @indent = 0
+ @indent_stack = []
+ @lex_state = :EXPR_BEG
+ @space_seen = false
+ @here_header = false
+
+ @continue = false
+ prompt
+
+ @line = ""
+ @exp_line_no = @line_no
+ end
+
+ def each_top_level_statement
+ initialize_input
+ catch(:TERM_INPUT) do
+ loop do
+ begin
+ @continue = false
+ prompt
+ unless l = lex
+ throw :TERM_INPUT if @line == ''
+ else
+ #p l
+ @line.concat l
+ if @ltype or @continue or @indent > 0
+ next
+ end
+ end
+ if @line != "\n"
+ yield @line, @exp_line_no
+ end
+ break unless l
+ @line = ''
+ @exp_line_no = @line_no
+
+ @indent = 0
+ @indent_stack = []
+ prompt
+ rescue TerminateLineInput
+ initialize_input
+ prompt
+ get_readed
+ end
+ end
+ end
+ end
+
+ def lex
+ until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
+ !@continue or
+ tk.nil?)
+ #p tk
+ #p @lex_state
+ #p self
+ end
+ line = get_readed
+ # print self.inspect
+ if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
+ nil
+ else
+ line
+ end
+ end
+
+ def token
+ # require "tracer"
+ # Tracer.on
+ @prev_seek = @seek
+ @prev_line_no = @line_no
+ @prev_char_no = @char_no
+ begin
+ begin
+ tk = @OP.match(self)
+ @space_seen = tk.kind_of?(TkSPACE)
+ rescue SyntaxError => e
+ raise Error, "syntax error: #{e.message}" if
+ @exception_on_syntax_error
+
+ tk = TkError.new(@seek, @line_no, @char_no)
+ end
+ end while @skip_space and tk.kind_of?(TkSPACE)
+
+ if @readed_auto_clean_up
+ get_readed
+ end
+ # Tracer.off
+ tk
+ end
+
+ ENINDENT_CLAUSE = [
+ "case", "class", "def", "do", "for", "if",
+ "module", "unless", "until", "while", "begin" #, "when"
+ ]
+
+ DEINDENT_CLAUSE = ["end" #, "when"
+ ]
+
+ PERCENT_LTYPE = {
+ "q" => "\'",
+ "Q" => "\"",
+ "x" => "\`",
+ "r" => "/",
+ "w" => "]",
+ "W" => "]",
+ "s" => ":"
+ }
+
+ PERCENT_PAREN = {
+ "{" => "}",
+ "[" => "]",
+ "<" => ">",
+ "(" => ")"
+ }
+
+ PERCENT_PAREN_REV = PERCENT_PAREN.invert
+
+ Ltype2Token = {
+ "\'" => TkSTRING,
+ "\"" => TkSTRING,
+ "\`" => TkXSTRING,
+ "/" => TkREGEXP,
+ "]" => TkDSTRING,
+ ":" => TkSYMBOL
+ }
+ DLtype2Token = {
+ "\"" => TkDSTRING,
+ "\`" => TkDXSTRING,
+ "/" => TkDREGEXP,
+ }
+
+ def lex_init()
+ @OP = IRB::SLex.new
+ @OP.def_rules("\0", "\004", "\032") do |op, io|
+ Token(TkEND_OF_SCRIPT, '')
+ end
+
+ @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
+ @space_seen = true
+ str = op
+ while (ch = getc) =~ /[ \t\f\r\13]/ do
+ str << ch
+ end
+ ungetc
+ Token TkSPACE, str
+ end
+
+ @OP.def_rule("#") do |op, io|
+ identify_comment
+ end
+
+ @OP.def_rule("=begin",
+ proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
+ |op, io|
+ @ltype = "="
+ res = ''
+ nil until getc == "\n"
+
+ until ( peek_equal?("=end") && peek(4) =~ /\s/ ) do
+ (ch = getc)
+ res << ch
+ end
+
+ gets # consume =end
+
+ @ltype = nil
+ Token(TkRD_COMMENT, res)
+ end
+
+ @OP.def_rule("\n") do |op, io|
+ print "\\n\n" if RDoc::RubyLex.debug?
+ case @lex_state
+ when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT
+ @continue = true
+ else
+ @continue = false
+ @lex_state = :EXPR_BEG
+ until (@indent_stack.empty? ||
+ [TkLPAREN, TkLBRACK, TkLBRACE,
+ TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
+ @indent_stack.pop
+ end
+ end
+ @here_header = false
+ @here_readed = []
+ Token(TkNL)
+ end
+
+ @OP.def_rules("*", "**",
+ "=", "==", "===",
+ "=~", "<=>",
+ "<", "<=",
+ ">", ">=", ">>") do
+ |op, io|
+ case @lex_state
+ when :EXPR_FNAME, :EXPR_DOT
+ @lex_state = :EXPR_ARG
+ else
+ @lex_state = :EXPR_BEG
+ end
+ Token(op)
+ end
+
+ @OP.def_rules("!", "!=", "!~") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ Token(op)
+ end
+
+ @OP.def_rules("<<") do
+ |op, io|
+ tk = nil
+ if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS &&
+ (@lex_state != :EXPR_ARG || @space_seen)
+ c = peek(0)
+ if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
+ tk = identify_here_document
+ end
+ end
+ unless tk
+ tk = Token(op)
+ case @lex_state
+ when :EXPR_FNAME, :EXPR_DOT
+ @lex_state = :EXPR_ARG
+ else
+ @lex_state = :EXPR_BEG
+ end
+ end
+ tk
+ end
+
+ @OP.def_rules("'", '"') do
+ |op, io|
+ identify_string(op)
+ end
+
+ @OP.def_rules("`") do
+ |op, io|
+ if @lex_state == :EXPR_FNAME
+ @lex_state = :EXPR_END
+ Token(op)
+ else
+ identify_string(op)
+ end
+ end
+
+ @OP.def_rules('?') do
+ |op, io|
+ if @lex_state == :EXPR_END
+ @lex_state = :EXPR_BEG
+ Token(TkQUESTION)
+ else
+ ch = getc
+ if @lex_state == :EXPR_ARG && ch =~ /\s/
+ ungetc
+ @lex_state = :EXPR_BEG;
+ Token(TkQUESTION)
+ else
+ @lex_state = :EXPR_END
+ Token(TkCHAR, "?#{ch}")
+ end
+ end
+ end
+
+ @OP.def_rules("&", "&&", "|", "||") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ Token(op)
+ end
+
+ @OP.def_rules("+=", "-=", "*=", "**=",
+ "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ op =~ /^(.*)=$/
+ Token(TkOPASGN, $1)
+ end
+
+ @OP.def_rule("+@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
+ |op, io|
+ @lex_state = :EXPR_ARG
+ Token(op)
+ end
+
+ @OP.def_rule("-@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
+ |op, io|
+ @lex_state = :EXPR_ARG
+ Token(op)
+ end
+
+ @OP.def_rules("+", "-") do
+ |op, io|
+ catch(:RET) do
+ if @lex_state == :EXPR_ARG
+ if @space_seen and peek(0) =~ /[0-9]/
+ throw :RET, identify_number(op)
+ else
+ @lex_state = :EXPR_BEG
+ end
+ elsif @lex_state != :EXPR_END and peek(0) =~ /[0-9]/
+ throw :RET, identify_number(op)
+ else
+ @lex_state = :EXPR_BEG
+ end
+ Token(op)
+ end
+ end
+
+ @OP.def_rule(".") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ if peek(0) =~ /[0-9]/
+ ungetc
+ identify_number
+ else
+ # for "obj.if" etc.
+ @lex_state = :EXPR_DOT
+ Token(TkDOT)
+ end
+ end
+
+ @OP.def_rules("..", "...") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ Token(op)
+ end
+
+ lex_int2
+ end
+
+ def lex_int2
+ @OP.def_rules("]", "}", ")") do
+ |op, io|
+ @lex_state = :EXPR_END
+ @indent -= 1
+ @indent_stack.pop
+ Token(op)
+ end
+
+ @OP.def_rule(":") do
+ |op, io|
+ if @lex_state == :EXPR_END || peek(0) =~ /\s/
+ @lex_state = :EXPR_BEG
+ Token(TkCOLON)
+ else
+ @lex_state = :EXPR_FNAME;
+ Token(TkSYMBEG)
+ end
+ end
+
+ @OP.def_rule("::") do
+ |op, io|
+ # p @lex_state.id2name, @space_seen
+ if @lex_state == :EXPR_BEG or @lex_state == :EXPR_ARG && @space_seen
+ @lex_state = :EXPR_BEG
+ Token(TkCOLON3)
+ else
+ @lex_state = :EXPR_DOT
+ Token(TkCOLON2)
+ end
+ end
+
+ @OP.def_rule("/") do
+ |op, io|
+ if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
+ identify_string(op)
+ elsif peek(0) == '='
+ getc
+ @lex_state = :EXPR_BEG
+ Token(TkOPASGN, "/") #/)
+ elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/
+ identify_string(op)
+ else
+ @lex_state = :EXPR_BEG
+ Token("/") #/)
+ end
+ end
+
+ @OP.def_rules("^") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ Token("^")
+ end
+
+ # @OP.def_rules("^=") do
+ # @lex_state = :EXPR_BEG
+ # Token(OP_ASGN, :^)
+ # end
+
+ @OP.def_rules(",") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ Token(op)
+ end
+
+ @OP.def_rules(";") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ until (@indent_stack.empty? ||
+ [TkLPAREN, TkLBRACK, TkLBRACE,
+ TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
+ @indent_stack.pop
+ end
+ Token(op)
+ end
+
+ @OP.def_rule("~") do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ Token("~")
+ end
+
+ @OP.def_rule("~@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
+ |op, io|
+ @lex_state = :EXPR_BEG
+ Token("~")
+ end
+
+ @OP.def_rule("(") do
+ |op, io|
+ @indent += 1
+ if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
+ @lex_state = :EXPR_BEG
+ tk_c = TkfLPAREN
+ else
+ @lex_state = :EXPR_BEG
+ tk_c = TkLPAREN
+ end
+ @indent_stack.push tk_c
+ Token tk_c
+ end
+
+ @OP.def_rule("[]", proc{|op, io| @lex_state == :EXPR_FNAME}) do
+ |op, io|
+ @lex_state = :EXPR_ARG
+ Token("[]")
+ end
+
+ @OP.def_rule("[]=", proc{|op, io| @lex_state == :EXPR_FNAME}) do
+ |op, io|
+ @lex_state = :EXPR_ARG
+ Token("[]=")
+ end
+
+ @OP.def_rule("[") do
+ |op, io|
+ @indent += 1
+ if @lex_state == :EXPR_FNAME
+ tk_c = TkfLBRACK
+ else
+ if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
+ tk_c = TkLBRACK
+ elsif @lex_state == :EXPR_ARG && @space_seen
+ tk_c = TkLBRACK
+ else
+ tk_c = TkfLBRACK
+ end
+ @lex_state = :EXPR_BEG
+ end
+ @indent_stack.push tk_c
+ Token(tk_c)
+ end
+
+ @OP.def_rule("{") do
+ |op, io|
+ @indent += 1
+ if @lex_state != :EXPR_END && @lex_state != :EXPR_ARG
+ tk_c = TkLBRACE
+ else
+ tk_c = TkfLBRACE
+ end
+ @lex_state = :EXPR_BEG
+ @indent_stack.push tk_c
+ Token(tk_c)
+ end
+
+ @OP.def_rule('\\') do
+ |op, io|
+ if getc == "\n"
+ @space_seen = true
+ @continue = true
+ Token(TkSPACE)
+ else
+ ungetc
+ Token("\\")
+ end
+ end
+
+ @OP.def_rule('%') do
+ |op, io|
+ if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
+ identify_quotation
+ elsif peek(0) == '='
+ getc
+ Token(TkOPASGN, :%)
+ elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/
+ identify_quotation
+ else
+ @lex_state = :EXPR_BEG
+ Token("%") #))
+ end
+ end
+
+ @OP.def_rule('$') do
+ |op, io|
+ identify_gvar
+ end
+
+ @OP.def_rule('@') do
+ |op, io|
+ if peek(0) =~ /[\w@]/
+ ungetc
+ identify_identifier
+ else
+ Token("@")
+ end
+ end
+
+ # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
+ # |op, io|
+ # @indent += 1
+ # @lex_state = :EXPR_FNAME
+ # # @lex_state = :EXPR_END
+ # # until @rests[0] == "\n" or @rests[0] == ";"
+ # # rests.shift
+ # # end
+ # end
+
+ @OP.def_rule("_") do
+ if peek_match?(/_END__/) and @lex_state == :EXPR_BEG then
+ 6.times { getc }
+ Token(TkEND_OF_SCRIPT, '__END__')
+ else
+ ungetc
+ identify_identifier
+ end
+ end
+
+ @OP.def_rule("") do
+ |op, io|
+ printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
+ if peek(0) =~ /[0-9]/
+ t = identify_number
+ else
+ t = identify_identifier
+ end
+ printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
+ t
+ end
+
+ p @OP if RDoc::RubyLex.debug?
+ end
+
+ def identify_gvar
+ @lex_state = :EXPR_END
+
+ case ch = getc
+ when /[~_*$?!@\/\\;,=:<>".]/ #"
+ Token(TkGVAR, "$" + ch)
+ when "-"
+ Token(TkGVAR, "$-" + getc)
+ when "&", "`", "'", "+"
+ Token(TkBACK_REF, "$"+ch)
+ when /[1-9]/
+ ref = ch
+ while (ch = getc) =~ /[0-9]/ do ref << ch end
+ ungetc
+ Token(TkNTH_REF, "$#{ref}")
+ when /\w/
+ ungetc
+ ungetc
+ identify_identifier
+ else
+ ungetc
+ Token("$")
+ end
+ end
+
+ IDENT_RE = if defined? Encoding then
+ eval '/[\w\u{0080}-\u{FFFFF}]/u' # 1.8 can't parse \u{}
+ else
+ /[\w\x80-\xFF]/
+ end
+
+ def identify_identifier
+ token = ""
+ if peek(0) =~ /[$@]/
+ token.concat(c = getc)
+ if c == "@" and peek(0) == "@"
+ token.concat getc
+ end
+ end
+
+ while (ch = getc) =~ IDENT_RE do
+ print " :#{ch}: " if RDoc::RubyLex.debug?
+ token.concat ch
+ end
+
+ ungetc
+
+ if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
+ token.concat getc
+ end
+
+ # almost fix token
+
+ case token
+ when /^\$/
+ return Token(TkGVAR, token)
+ when /^\@\@/
+ @lex_state = :EXPR_END
+ # p Token(TkCVAR, token)
+ return Token(TkCVAR, token)
+ when /^\@/
+ @lex_state = :EXPR_END
+ return Token(TkIVAR, token)
+ end
+
+ if @lex_state != :EXPR_DOT
+ print token, "\n" if RDoc::RubyLex.debug?
+
+ token_c, *trans = TkReading2Token[token]
+ if token_c
+ # reserved word?
+
+ if (@lex_state != :EXPR_BEG &&
+ @lex_state != :EXPR_FNAME &&
+ trans[1])
+ # modifiers
+ token_c = TkSymbol2Token[trans[1]]
+ @lex_state = trans[0]
+ else
+ if @lex_state != :EXPR_FNAME
+ if ENINDENT_CLAUSE.include?(token)
+ valid = peek(0) != ':'
+
+ # check for ``class = val'' etc.
+ case token
+ when "class"
+ valid = false unless peek_match?(/^\s*(<<|\w|::)/)
+ when "def"
+ valid = false if peek_match?(/^\s*(([+-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
+ when "do"
+ valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&)/)
+ when *ENINDENT_CLAUSE
+ valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/)
+ else
+ # no nothing
+ end if valid
+
+ if valid
+ if token == "do"
+ if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
+ @indent += 1
+ @indent_stack.push token_c
+ end
+ else
+ @indent += 1
+ @indent_stack.push token_c
+ end
+ else
+ token_c = TkIDENTIFIER
+ end
+
+ elsif DEINDENT_CLAUSE.include?(token)
+ @indent -= 1
+ @indent_stack.pop
+ end
+ @lex_state = trans[0]
+ else
+ @lex_state = :EXPR_END
+ end
+ end
+ return Token(token_c, token)
+ end
+ end
+
+ if @lex_state == :EXPR_FNAME
+ @lex_state = :EXPR_END
+ if peek(0) == '='
+ token.concat getc
+ end
+ elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_DOT ||
+ @lex_state == :EXPR_ARG
+ @lex_state = :EXPR_ARG
+ else
+ @lex_state = :EXPR_END
+ end
+
+ if token[0, 1] =~ /[A-Z]/
+ return Token(TkCONSTANT, token)
+ elsif token[token.size - 1, 1] =~ /[!?]/
+ return Token(TkFID, token)
+ else
+ return Token(TkIDENTIFIER, token)
+ end
+ end
+
+ def identify_here_document
+ ch = getc
+ # if lt = PERCENT_LTYPE[ch]
+ if ch == "-"
+ ch = getc
+ indent = true
+ end
+ if /['"`]/ =~ ch
+ user_quote = lt = ch
+ quoted = ""
+ while (c = getc) && c != lt
+ quoted.concat c
+ end
+ else
+ user_quote = nil
+ lt = '"'
+ quoted = ch.dup
+ while (c = getc) && c =~ /\w/
+ quoted.concat c
+ end
+ ungetc
+ end
+
+ ltback, @ltype = @ltype, lt
+ reserve = []
+ while ch = getc
+ reserve.push ch
+ if ch == "\\"
+ reserve.push ch = getc
+ elsif ch == "\n"
+ break
+ end
+ end
+
+ output_heredoc = reserve.join =~ /\A\r?\n\z/
+
+ if output_heredoc then
+ doc = '<<'
+ doc << '-' if indent
+ doc << "#{user_quote}#{quoted}#{user_quote}\n"
+ else
+ doc = '"'
+ end
+
+ @here_header = false
+ while l = gets
+ l = l.sub(/(:?\r)?\n\z/, "\n")
+ if (indent ? l.strip : l.chomp) == quoted
+ break
+ end
+ doc << l
+ end
+
+ if output_heredoc then
+ raise Error, "Missing terminating #{quoted} for string" unless l
+
+ doc << l.chomp
+ else
+ doc << '"'
+ end
+
+ @here_header = true
+ @here_readed.concat reserve
+ while ch = reserve.pop
+ ungetc ch
+ end
+
+ token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt]
+ @ltype = ltback
+ @lex_state = :EXPR_END
+ Token(token_class, doc)
+ end
+
+ def identify_quotation
+ type = ch = getc
+ if lt = PERCENT_LTYPE[type]
+ ch = getc
+ elsif type =~ /\W/
+ type = nil
+ lt = "\""
+ else
+ return Token(TkMOD, '%')
+ end
+ # if ch !~ /\W/
+ # ungetc
+ # next
+ # end
+ #@ltype = lt
+ @quoted = ch unless @quoted = PERCENT_PAREN[ch]
+ identify_string(lt, @quoted, type)
+ end
+
+ def identify_number(op = "")
+ @lex_state = :EXPR_END
+
+ num = op
+
+ if peek(0) == "0" && peek(1) !~ /[.eE]/
+ num << getc
+
+ case peek(0)
+ when /[xX]/
+ ch = getc
+ match = /[0-9a-fA-F_]/
+ when /[bB]/
+ ch = getc
+ match = /[01_]/
+ when /[oO]/
+ ch = getc
+ match = /[0-7_]/
+ when /[dD]/
+ ch = getc
+ match = /[0-9_]/
+ when /[0-7]/
+ match = /[0-7_]/
+ when /[89]/
+ raise Error, "Illegal octal digit"
+ else
+ return Token(TkINTEGER, num)
+ end
+
+ num << ch if ch
+
+ len0 = true
+ non_digit = false
+ while ch = getc
+ num << ch
+ if match =~ ch
+ if ch == "_"
+ if non_digit
+ raise Error, "trailing `#{ch}' in number"
+ else
+ non_digit = ch
+ end
+ else
+ non_digit = false
+ len0 = false
+ end
+ else
+ ungetc
+ num[-1, 1] = ''
+ if len0
+ raise Error, "numeric literal without digits"
+ end
+ if non_digit
+ raise Error, "trailing `#{non_digit}' in number"
+ end
+ break
+ end
+ end
+ return Token(TkINTEGER, num)
+ end
+
+ type = TkINTEGER
+ allow_point = true
+ allow_e = true
+ non_digit = false
+ while ch = getc
+ num << ch
+ case ch
+ when /[0-9]/
+ non_digit = false
+ when "_"
+ non_digit = ch
+ when allow_point && "."
+ if non_digit
+ raise Error, "trailing `#{non_digit}' in number"
+ end
+ type = TkFLOAT
+ if peek(0) !~ /[0-9]/
+ type = TkINTEGER
+ ungetc
+ num[-1, 1] = ''
+ break
+ end
+ allow_point = false
+ when allow_e && "e", allow_e && "E"
+ if non_digit
+ raise Error, "trailing `#{non_digit}' in number"
+ end
+ type = TkFLOAT
+ if peek(0) =~ /[+-]/
+ num << getc
+ end
+ allow_e = false
+ allow_point = false
+ non_digit = ch
+ else
+ if non_digit
+ raise Error, "trailing `#{non_digit}' in number"
+ end
+ ungetc
+ num[-1, 1] = ''
+ break
+ end
+ end
+
+ Token(type, num)
+ end
+
+ def identify_string(ltype, quoted = ltype, type = nil)
+ close = PERCENT_PAREN.values.include?(quoted)
+ @ltype = ltype
+ @quoted = quoted
+
+ str = if ltype == quoted and %w[" ' /].include? ltype then
+ ltype.dup
+ elsif RUBY_VERSION > '1.9' then
+ "%#{type or PERCENT_LTYPE.key ltype}#{PERCENT_PAREN_REV[quoted]||quoted}"
+ else
+ "%#{type or PERCENT_LTYPE.index ltype}#{PERCENT_PAREN_REV[quoted]||quoted}"
+ end
+
+ subtype = nil
+ begin
+ nest = 0
+
+ while ch = getc
+ str << ch
+
+ if @quoted == ch and nest <= 0
+ break
+ elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
+ ch = getc
+ subtype = true
+ if ch == "{" then
+ str << ch << skip_inner_expression
+ next
+ else
+ ungetc
+ end
+ elsif ch == '\\'
+ if %w[' /].include? @ltype then
+ case ch = getc
+ when "\\", "\n", "'"
+ when @ltype
+ str << ch
+ else
+ ungetc
+ end
+ else
+ str << read_escape
+ end
+ end
+
+ if close then
+ if PERCENT_PAREN[ch] == @quoted
+ nest += 1
+ elsif ch == @quoted
+ nest -= 1
+ end
+ end
+ end
+
+ if @ltype == "/"
+ while peek(0) =~ /i|m|x|o|e|s|u|n/
+ str << getc
+ end
+ end
+
+ if subtype
+ Token(DLtype2Token[ltype], str)
+ else
+ Token(Ltype2Token[ltype], str)
+ end
+ ensure
+ @ltype = nil
+ @quoted = nil
+ @lex_state = :EXPR_END
+ end
+ end
+
+ def skip_inner_expression
+ res = ""
+ nest = 0
+ while ch = getc
+ res << ch
+ if ch == '}'
+ break if nest.zero?
+ nest -= 1
+ elsif ch == '{'
+ nest += 1
+ end
+ end
+ res
+ end
+
+ def identify_comment
+ @ltype = "#"
+
+ comment = '#'
+
+ while ch = getc
+ # if ch == "\\" #"
+ # read_escape
+ # end
+ if ch == "\n"
+ @ltype = nil
+ ungetc
+ break
+ end
+
+ comment << ch
+ end
+
+ return Token(TkCOMMENT, comment)
+ end
+
+ def read_escape
+ escape = ''
+ ch = getc
+
+ case ch
+ when "\n", "\r", "\f"
+ escape << ch
+ when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
+ escape << ch
+ when /[0-7]/
+ ungetc ch
+ 3.times do
+ ch = getc
+ case ch
+ when /[0-7]/
+ escape << ch
+ when nil
+ break
+ else
+ ungetc
+ break
+ end
+ end
+
+ when "x"
+ escape << ch
+
+ 2.times do
+ ch = getc
+ case ch
+ when /[0-9a-fA-F]/
+ escape << ch
+ when nil
+ break
+ else
+ ungetc
+ break
+ end
+ end
+
+ when "M"
+ escape << ch
+
+ ch = getc
+ if ch != '-'
+ ungetc
+ else
+ escape << ch
+
+ ch = getc
+ if ch == "\\" #"
+ ungetc
+ escape << read_escape
+ else
+ escape << ch
+ end
+ end
+
+ when "C", "c" #, "^"
+ escape << ch
+
+ if ch == "C"
+ ch = getc
+
+ if ch == "-"
+ escape << ch
+ ch = getc
+ escape << ch
+
+ escape << read_escape if ch == "\\"
+ else
+ ungetc
+ end
+ elsif (ch = getc) == "\\" #"
+ escape << ch << read_escape
+ end
+ else
+ escape << ch
+
+ # other characters
+ end
+
+ escape
+ end
+
+ # :startdoc:
+
+end
+
+#RDoc::RubyLex.debug_level = 1
+