diff options
author | Jari Vetoniemi <jari.vetoniemi@indooratlas.com> | 2020-03-16 18:49:26 +0900 |
---|---|---|
committer | Jari Vetoniemi <jari.vetoniemi@indooratlas.com> | 2020-03-30 00:39:06 +0900 |
commit | fcbf63e62c627deae76c1b8cb8c0876c536ed811 (patch) | |
tree | 64cb17de3f41a2b6fef2368028fbd00349946994 /jni/ruby/lib/rdoc/ruby_lex.rb |
Fresh start
Diffstat (limited to 'jni/ruby/lib/rdoc/ruby_lex.rb')
-rw-r--r-- | jni/ruby/lib/rdoc/ruby_lex.rb | 1377 |
1 files changed, 1377 insertions, 0 deletions
diff --git a/jni/ruby/lib/rdoc/ruby_lex.rb b/jni/ruby/lib/rdoc/ruby_lex.rb new file mode 100644 index 0000000..91b90ab --- /dev/null +++ b/jni/ruby/lib/rdoc/ruby_lex.rb @@ -0,0 +1,1377 @@ +# coding: US-ASCII + +#-- +# irb/ruby-lex.rb - ruby lexcal analyzer +# $Release Version: 0.9.5$ +# $Revision: 17979 $ +# $Date: 2008-07-09 10:17:05 -0700 (Wed, 09 Jul 2008) $ +# by Keiju ISHITSUKA(keiju@ruby-lang.org) +# +#++ + +require "e2mmap" +require "irb/slex" +require "stringio" + +## +# Ruby lexer adapted from irb. +# +# The internals are not documented because they are scary. + +class RDoc::RubyLex + + ## + # Raised upon invalid input + + class Error < RDoc::Error + end + + # :stopdoc: + + extend Exception2MessageMapper + + def_exception(:AlreadyDefinedToken, "Already defined token(%s)") + def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')") + def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')") + def_exception(:TkReading2TokenDuplicateError, + "key duplicate(token_n='%s', key='%s')") + def_exception(:SyntaxError, "%s") + + def_exception(:TerminateLineInput, "Terminate Line Input") + + include RDoc::RubyToken + include IRB + + attr_accessor :continue + attr_accessor :lex_state + attr_reader :reader + + class << self + attr_accessor :debug_level + end + + def self.debug? + @debug_level > 0 + end + + self.debug_level = 0 + + # :startdoc: + + ## + # Returns an Array of +ruby+ tokens. See ::new for a description of + # +options+. + + def self.tokenize ruby, options + tokens = [] + + scanner = RDoc::RubyLex.new ruby, options + scanner.exception_on_syntax_error = true + + while token = scanner.token do + tokens << token + end + + tokens + end + + ## + # Creates a new lexer for +content+. +options+ is an RDoc::Options, only + # +tab_width is used. + + def initialize(content, options) + lex_init + + if /\t/ =~ content then + tab_width = options.tab_width + content = content.split(/\n/).map do |line| + 1 while line.gsub!(/\t+/) { + ' ' * (tab_width*$&.length - $`.length % tab_width) + } && $~ + line + end.join("\n") + end + + content << "\n" unless content[-1, 1] == "\n" + + set_input StringIO.new content + + @base_char_no = 0 + @char_no = 0 + @exp_line_no = @line_no = 1 + @here_readed = [] + @readed = [] + @rests = [] + @seek = 0 + + @here_header = false + @indent = 0 + @indent_stack = [] + @lex_state = :EXPR_BEG + @space_seen = false + + @continue = false + @line = "" + + @skip_space = false + @readed_auto_clean_up = false + @exception_on_syntax_error = true + + @prompt = nil + @prev_seek = nil + @ltype = nil + end + + # :stopdoc: + + def inspect # :nodoc: + "#<%s:0x%x pos %d lex_state %p space_seen %p>" % [ + self.class, object_id, + @io.pos, @lex_state, @space_seen, + ] + end + + attr_accessor :skip_space + attr_accessor :readed_auto_clean_up + attr_accessor :exception_on_syntax_error + + attr_reader :seek + attr_reader :char_no + attr_reader :line_no + attr_reader :indent + + # io functions + def set_input(io, p = nil, &block) + @io = io + if p.respond_to?(:call) + @input = p + elsif block_given? + @input = block + else + @input = Proc.new{@io.gets} + end + end + + def get_readed + if idx = @readed.rindex("\n") + @base_char_no = @readed.size - (idx + 1) + else + @base_char_no += @readed.size + end + + readed = @readed.join("") + @readed = [] + readed + end + + def getc + while @rests.empty? + # return nil unless buf_input + @rests.push nil unless buf_input + end + c = @rests.shift + if @here_header + @here_readed.push c + else + @readed.push c + end + @seek += 1 + if c == "\n" + @line_no += 1 + @char_no = 0 + else + @char_no += 1 + end + + c + end + + def gets + l = "" + while c = getc + l.concat(c) + break if c == "\n" + end + return nil if l == "" and c.nil? + l + end + + def eof? + @io.eof? + end + + def getc_of_rests + if @rests.empty? + nil + else + getc + end + end + + def ungetc(c = nil) + if @here_readed.empty? + c2 = @readed.pop + else + c2 = @here_readed.pop + end + c = c2 unless c + @rests.unshift c #c = + @seek -= 1 + if c == "\n" + @line_no -= 1 + if idx = @readed.rindex("\n") + @char_no = idx + 1 + else + @char_no = @base_char_no + @readed.size + end + else + @char_no -= 1 + end + end + + def peek_equal?(str) + chrs = str.split(//) + until @rests.size >= chrs.size + return false unless buf_input + end + @rests[0, chrs.size] == chrs + end + + def peek_match?(regexp) + while @rests.empty? + return false unless buf_input + end + regexp =~ @rests.join("") + end + + def peek(i = 0) + while @rests.size <= i + return nil unless buf_input + end + @rests[i] + end + + def buf_input + prompt + line = @input.call + return nil unless line + @rests.concat line.split(//) + true + end + private :buf_input + + def set_prompt(p = nil, &block) + p = block if block_given? + if p.respond_to?(:call) + @prompt = p + else + @prompt = Proc.new{print p} + end + end + + def prompt + if @prompt + @prompt.call(@ltype, @indent, @continue, @line_no) + end + end + + def initialize_input + @ltype = nil + @quoted = nil + @indent = 0 + @indent_stack = [] + @lex_state = :EXPR_BEG + @space_seen = false + @here_header = false + + @continue = false + prompt + + @line = "" + @exp_line_no = @line_no + end + + def each_top_level_statement + initialize_input + catch(:TERM_INPUT) do + loop do + begin + @continue = false + prompt + unless l = lex + throw :TERM_INPUT if @line == '' + else + #p l + @line.concat l + if @ltype or @continue or @indent > 0 + next + end + end + if @line != "\n" + yield @line, @exp_line_no + end + break unless l + @line = '' + @exp_line_no = @line_no + + @indent = 0 + @indent_stack = [] + prompt + rescue TerminateLineInput + initialize_input + prompt + get_readed + end + end + end + end + + def lex + until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) && + !@continue or + tk.nil?) + #p tk + #p @lex_state + #p self + end + line = get_readed + # print self.inspect + if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil? + nil + else + line + end + end + + def token + # require "tracer" + # Tracer.on + @prev_seek = @seek + @prev_line_no = @line_no + @prev_char_no = @char_no + begin + begin + tk = @OP.match(self) + @space_seen = tk.kind_of?(TkSPACE) + rescue SyntaxError => e + raise Error, "syntax error: #{e.message}" if + @exception_on_syntax_error + + tk = TkError.new(@seek, @line_no, @char_no) + end + end while @skip_space and tk.kind_of?(TkSPACE) + + if @readed_auto_clean_up + get_readed + end + # Tracer.off + tk + end + + ENINDENT_CLAUSE = [ + "case", "class", "def", "do", "for", "if", + "module", "unless", "until", "while", "begin" #, "when" + ] + + DEINDENT_CLAUSE = ["end" #, "when" + ] + + PERCENT_LTYPE = { + "q" => "\'", + "Q" => "\"", + "x" => "\`", + "r" => "/", + "w" => "]", + "W" => "]", + "s" => ":" + } + + PERCENT_PAREN = { + "{" => "}", + "[" => "]", + "<" => ">", + "(" => ")" + } + + PERCENT_PAREN_REV = PERCENT_PAREN.invert + + Ltype2Token = { + "\'" => TkSTRING, + "\"" => TkSTRING, + "\`" => TkXSTRING, + "/" => TkREGEXP, + "]" => TkDSTRING, + ":" => TkSYMBOL + } + DLtype2Token = { + "\"" => TkDSTRING, + "\`" => TkDXSTRING, + "/" => TkDREGEXP, + } + + def lex_init() + @OP = IRB::SLex.new + @OP.def_rules("\0", "\004", "\032") do |op, io| + Token(TkEND_OF_SCRIPT, '') + end + + @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io| + @space_seen = true + str = op + while (ch = getc) =~ /[ \t\f\r\13]/ do + str << ch + end + ungetc + Token TkSPACE, str + end + + @OP.def_rule("#") do |op, io| + identify_comment + end + + @OP.def_rule("=begin", + proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do + |op, io| + @ltype = "=" + res = '' + nil until getc == "\n" + + until ( peek_equal?("=end") && peek(4) =~ /\s/ ) do + (ch = getc) + res << ch + end + + gets # consume =end + + @ltype = nil + Token(TkRD_COMMENT, res) + end + + @OP.def_rule("\n") do |op, io| + print "\\n\n" if RDoc::RubyLex.debug? + case @lex_state + when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT + @continue = true + else + @continue = false + @lex_state = :EXPR_BEG + until (@indent_stack.empty? || + [TkLPAREN, TkLBRACK, TkLBRACE, + TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) + @indent_stack.pop + end + end + @here_header = false + @here_readed = [] + Token(TkNL) + end + + @OP.def_rules("*", "**", + "=", "==", "===", + "=~", "<=>", + "<", "<=", + ">", ">=", ">>") do + |op, io| + case @lex_state + when :EXPR_FNAME, :EXPR_DOT + @lex_state = :EXPR_ARG + else + @lex_state = :EXPR_BEG + end + Token(op) + end + + @OP.def_rules("!", "!=", "!~") do + |op, io| + @lex_state = :EXPR_BEG + Token(op) + end + + @OP.def_rules("<<") do + |op, io| + tk = nil + if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS && + (@lex_state != :EXPR_ARG || @space_seen) + c = peek(0) + if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-") + tk = identify_here_document + end + end + unless tk + tk = Token(op) + case @lex_state + when :EXPR_FNAME, :EXPR_DOT + @lex_state = :EXPR_ARG + else + @lex_state = :EXPR_BEG + end + end + tk + end + + @OP.def_rules("'", '"') do + |op, io| + identify_string(op) + end + + @OP.def_rules("`") do + |op, io| + if @lex_state == :EXPR_FNAME + @lex_state = :EXPR_END + Token(op) + else + identify_string(op) + end + end + + @OP.def_rules('?') do + |op, io| + if @lex_state == :EXPR_END + @lex_state = :EXPR_BEG + Token(TkQUESTION) + else + ch = getc + if @lex_state == :EXPR_ARG && ch =~ /\s/ + ungetc + @lex_state = :EXPR_BEG; + Token(TkQUESTION) + else + @lex_state = :EXPR_END + Token(TkCHAR, "?#{ch}") + end + end + end + + @OP.def_rules("&", "&&", "|", "||") do + |op, io| + @lex_state = :EXPR_BEG + Token(op) + end + + @OP.def_rules("+=", "-=", "*=", "**=", + "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do + |op, io| + @lex_state = :EXPR_BEG + op =~ /^(.*)=$/ + Token(TkOPASGN, $1) + end + + @OP.def_rule("+@", proc{|op, io| @lex_state == :EXPR_FNAME}) do + |op, io| + @lex_state = :EXPR_ARG + Token(op) + end + + @OP.def_rule("-@", proc{|op, io| @lex_state == :EXPR_FNAME}) do + |op, io| + @lex_state = :EXPR_ARG + Token(op) + end + + @OP.def_rules("+", "-") do + |op, io| + catch(:RET) do + if @lex_state == :EXPR_ARG + if @space_seen and peek(0) =~ /[0-9]/ + throw :RET, identify_number(op) + else + @lex_state = :EXPR_BEG + end + elsif @lex_state != :EXPR_END and peek(0) =~ /[0-9]/ + throw :RET, identify_number(op) + else + @lex_state = :EXPR_BEG + end + Token(op) + end + end + + @OP.def_rule(".") do + |op, io| + @lex_state = :EXPR_BEG + if peek(0) =~ /[0-9]/ + ungetc + identify_number + else + # for "obj.if" etc. + @lex_state = :EXPR_DOT + Token(TkDOT) + end + end + + @OP.def_rules("..", "...") do + |op, io| + @lex_state = :EXPR_BEG + Token(op) + end + + lex_int2 + end + + def lex_int2 + @OP.def_rules("]", "}", ")") do + |op, io| + @lex_state = :EXPR_END + @indent -= 1 + @indent_stack.pop + Token(op) + end + + @OP.def_rule(":") do + |op, io| + if @lex_state == :EXPR_END || peek(0) =~ /\s/ + @lex_state = :EXPR_BEG + Token(TkCOLON) + else + @lex_state = :EXPR_FNAME; + Token(TkSYMBEG) + end + end + + @OP.def_rule("::") do + |op, io| + # p @lex_state.id2name, @space_seen + if @lex_state == :EXPR_BEG or @lex_state == :EXPR_ARG && @space_seen + @lex_state = :EXPR_BEG + Token(TkCOLON3) + else + @lex_state = :EXPR_DOT + Token(TkCOLON2) + end + end + + @OP.def_rule("/") do + |op, io| + if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID + identify_string(op) + elsif peek(0) == '=' + getc + @lex_state = :EXPR_BEG + Token(TkOPASGN, "/") #/) + elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/ + identify_string(op) + else + @lex_state = :EXPR_BEG + Token("/") #/) + end + end + + @OP.def_rules("^") do + |op, io| + @lex_state = :EXPR_BEG + Token("^") + end + + # @OP.def_rules("^=") do + # @lex_state = :EXPR_BEG + # Token(OP_ASGN, :^) + # end + + @OP.def_rules(",") do + |op, io| + @lex_state = :EXPR_BEG + Token(op) + end + + @OP.def_rules(";") do + |op, io| + @lex_state = :EXPR_BEG + until (@indent_stack.empty? || + [TkLPAREN, TkLBRACK, TkLBRACE, + TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) + @indent_stack.pop + end + Token(op) + end + + @OP.def_rule("~") do + |op, io| + @lex_state = :EXPR_BEG + Token("~") + end + + @OP.def_rule("~@", proc{|op, io| @lex_state == :EXPR_FNAME}) do + |op, io| + @lex_state = :EXPR_BEG + Token("~") + end + + @OP.def_rule("(") do + |op, io| + @indent += 1 + if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID + @lex_state = :EXPR_BEG + tk_c = TkfLPAREN + else + @lex_state = :EXPR_BEG + tk_c = TkLPAREN + end + @indent_stack.push tk_c + Token tk_c + end + + @OP.def_rule("[]", proc{|op, io| @lex_state == :EXPR_FNAME}) do + |op, io| + @lex_state = :EXPR_ARG + Token("[]") + end + + @OP.def_rule("[]=", proc{|op, io| @lex_state == :EXPR_FNAME}) do + |op, io| + @lex_state = :EXPR_ARG + Token("[]=") + end + + @OP.def_rule("[") do + |op, io| + @indent += 1 + if @lex_state == :EXPR_FNAME + tk_c = TkfLBRACK + else + if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID + tk_c = TkLBRACK + elsif @lex_state == :EXPR_ARG && @space_seen + tk_c = TkLBRACK + else + tk_c = TkfLBRACK + end + @lex_state = :EXPR_BEG + end + @indent_stack.push tk_c + Token(tk_c) + end + + @OP.def_rule("{") do + |op, io| + @indent += 1 + if @lex_state != :EXPR_END && @lex_state != :EXPR_ARG + tk_c = TkLBRACE + else + tk_c = TkfLBRACE + end + @lex_state = :EXPR_BEG + @indent_stack.push tk_c + Token(tk_c) + end + + @OP.def_rule('\\') do + |op, io| + if getc == "\n" + @space_seen = true + @continue = true + Token(TkSPACE) + else + ungetc + Token("\\") + end + end + + @OP.def_rule('%') do + |op, io| + if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID + identify_quotation + elsif peek(0) == '=' + getc + Token(TkOPASGN, :%) + elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/ + identify_quotation + else + @lex_state = :EXPR_BEG + Token("%") #)) + end + end + + @OP.def_rule('$') do + |op, io| + identify_gvar + end + + @OP.def_rule('@') do + |op, io| + if peek(0) =~ /[\w@]/ + ungetc + identify_identifier + else + Token("@") + end + end + + # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do + # |op, io| + # @indent += 1 + # @lex_state = :EXPR_FNAME + # # @lex_state = :EXPR_END + # # until @rests[0] == "\n" or @rests[0] == ";" + # # rests.shift + # # end + # end + + @OP.def_rule("_") do + if peek_match?(/_END__/) and @lex_state == :EXPR_BEG then + 6.times { getc } + Token(TkEND_OF_SCRIPT, '__END__') + else + ungetc + identify_identifier + end + end + + @OP.def_rule("") do + |op, io| + printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug? + if peek(0) =~ /[0-9]/ + t = identify_number + else + t = identify_identifier + end + printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug? + t + end + + p @OP if RDoc::RubyLex.debug? + end + + def identify_gvar + @lex_state = :EXPR_END + + case ch = getc + when /[~_*$?!@\/\\;,=:<>".]/ #" + Token(TkGVAR, "$" + ch) + when "-" + Token(TkGVAR, "$-" + getc) + when "&", "`", "'", "+" + Token(TkBACK_REF, "$"+ch) + when /[1-9]/ + ref = ch + while (ch = getc) =~ /[0-9]/ do ref << ch end + ungetc + Token(TkNTH_REF, "$#{ref}") + when /\w/ + ungetc + ungetc + identify_identifier + else + ungetc + Token("$") + end + end + + IDENT_RE = if defined? Encoding then + eval '/[\w\u{0080}-\u{FFFFF}]/u' # 1.8 can't parse \u{} + else + /[\w\x80-\xFF]/ + end + + def identify_identifier + token = "" + if peek(0) =~ /[$@]/ + token.concat(c = getc) + if c == "@" and peek(0) == "@" + token.concat getc + end + end + + while (ch = getc) =~ IDENT_RE do + print " :#{ch}: " if RDoc::RubyLex.debug? + token.concat ch + end + + ungetc + + if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "=" + token.concat getc + end + + # almost fix token + + case token + when /^\$/ + return Token(TkGVAR, token) + when /^\@\@/ + @lex_state = :EXPR_END + # p Token(TkCVAR, token) + return Token(TkCVAR, token) + when /^\@/ + @lex_state = :EXPR_END + return Token(TkIVAR, token) + end + + if @lex_state != :EXPR_DOT + print token, "\n" if RDoc::RubyLex.debug? + + token_c, *trans = TkReading2Token[token] + if token_c + # reserved word? + + if (@lex_state != :EXPR_BEG && + @lex_state != :EXPR_FNAME && + trans[1]) + # modifiers + token_c = TkSymbol2Token[trans[1]] + @lex_state = trans[0] + else + if @lex_state != :EXPR_FNAME + if ENINDENT_CLAUSE.include?(token) + valid = peek(0) != ':' + + # check for ``class = val'' etc. + case token + when "class" + valid = false unless peek_match?(/^\s*(<<|\w|::)/) + when "def" + valid = false if peek_match?(/^\s*(([+-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/) + when "do" + valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&)/) + when *ENINDENT_CLAUSE + valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/) + else + # no nothing + end if valid + + if valid + if token == "do" + if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last) + @indent += 1 + @indent_stack.push token_c + end + else + @indent += 1 + @indent_stack.push token_c + end + else + token_c = TkIDENTIFIER + end + + elsif DEINDENT_CLAUSE.include?(token) + @indent -= 1 + @indent_stack.pop + end + @lex_state = trans[0] + else + @lex_state = :EXPR_END + end + end + return Token(token_c, token) + end + end + + if @lex_state == :EXPR_FNAME + @lex_state = :EXPR_END + if peek(0) == '=' + token.concat getc + end + elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_DOT || + @lex_state == :EXPR_ARG + @lex_state = :EXPR_ARG + else + @lex_state = :EXPR_END + end + + if token[0, 1] =~ /[A-Z]/ + return Token(TkCONSTANT, token) + elsif token[token.size - 1, 1] =~ /[!?]/ + return Token(TkFID, token) + else + return Token(TkIDENTIFIER, token) + end + end + + def identify_here_document + ch = getc + # if lt = PERCENT_LTYPE[ch] + if ch == "-" + ch = getc + indent = true + end + if /['"`]/ =~ ch + user_quote = lt = ch + quoted = "" + while (c = getc) && c != lt + quoted.concat c + end + else + user_quote = nil + lt = '"' + quoted = ch.dup + while (c = getc) && c =~ /\w/ + quoted.concat c + end + ungetc + end + + ltback, @ltype = @ltype, lt + reserve = [] + while ch = getc + reserve.push ch + if ch == "\\" + reserve.push ch = getc + elsif ch == "\n" + break + end + end + + output_heredoc = reserve.join =~ /\A\r?\n\z/ + + if output_heredoc then + doc = '<<' + doc << '-' if indent + doc << "#{user_quote}#{quoted}#{user_quote}\n" + else + doc = '"' + end + + @here_header = false + while l = gets + l = l.sub(/(:?\r)?\n\z/, "\n") + if (indent ? l.strip : l.chomp) == quoted + break + end + doc << l + end + + if output_heredoc then + raise Error, "Missing terminating #{quoted} for string" unless l + + doc << l.chomp + else + doc << '"' + end + + @here_header = true + @here_readed.concat reserve + while ch = reserve.pop + ungetc ch + end + + token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt] + @ltype = ltback + @lex_state = :EXPR_END + Token(token_class, doc) + end + + def identify_quotation + type = ch = getc + if lt = PERCENT_LTYPE[type] + ch = getc + elsif type =~ /\W/ + type = nil + lt = "\"" + else + return Token(TkMOD, '%') + end + # if ch !~ /\W/ + # ungetc + # next + # end + #@ltype = lt + @quoted = ch unless @quoted = PERCENT_PAREN[ch] + identify_string(lt, @quoted, type) + end + + def identify_number(op = "") + @lex_state = :EXPR_END + + num = op + + if peek(0) == "0" && peek(1) !~ /[.eE]/ + num << getc + + case peek(0) + when /[xX]/ + ch = getc + match = /[0-9a-fA-F_]/ + when /[bB]/ + ch = getc + match = /[01_]/ + when /[oO]/ + ch = getc + match = /[0-7_]/ + when /[dD]/ + ch = getc + match = /[0-9_]/ + when /[0-7]/ + match = /[0-7_]/ + when /[89]/ + raise Error, "Illegal octal digit" + else + return Token(TkINTEGER, num) + end + + num << ch if ch + + len0 = true + non_digit = false + while ch = getc + num << ch + if match =~ ch + if ch == "_" + if non_digit + raise Error, "trailing `#{ch}' in number" + else + non_digit = ch + end + else + non_digit = false + len0 = false + end + else + ungetc + num[-1, 1] = '' + if len0 + raise Error, "numeric literal without digits" + end + if non_digit + raise Error, "trailing `#{non_digit}' in number" + end + break + end + end + return Token(TkINTEGER, num) + end + + type = TkINTEGER + allow_point = true + allow_e = true + non_digit = false + while ch = getc + num << ch + case ch + when /[0-9]/ + non_digit = false + when "_" + non_digit = ch + when allow_point && "." + if non_digit + raise Error, "trailing `#{non_digit}' in number" + end + type = TkFLOAT + if peek(0) !~ /[0-9]/ + type = TkINTEGER + ungetc + num[-1, 1] = '' + break + end + allow_point = false + when allow_e && "e", allow_e && "E" + if non_digit + raise Error, "trailing `#{non_digit}' in number" + end + type = TkFLOAT + if peek(0) =~ /[+-]/ + num << getc + end + allow_e = false + allow_point = false + non_digit = ch + else + if non_digit + raise Error, "trailing `#{non_digit}' in number" + end + ungetc + num[-1, 1] = '' + break + end + end + + Token(type, num) + end + + def identify_string(ltype, quoted = ltype, type = nil) + close = PERCENT_PAREN.values.include?(quoted) + @ltype = ltype + @quoted = quoted + + str = if ltype == quoted and %w[" ' /].include? ltype then + ltype.dup + elsif RUBY_VERSION > '1.9' then + "%#{type or PERCENT_LTYPE.key ltype}#{PERCENT_PAREN_REV[quoted]||quoted}" + else + "%#{type or PERCENT_LTYPE.index ltype}#{PERCENT_PAREN_REV[quoted]||quoted}" + end + + subtype = nil + begin + nest = 0 + + while ch = getc + str << ch + + if @quoted == ch and nest <= 0 + break + elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#" + ch = getc + subtype = true + if ch == "{" then + str << ch << skip_inner_expression + next + else + ungetc + end + elsif ch == '\\' + if %w[' /].include? @ltype then + case ch = getc + when "\\", "\n", "'" + when @ltype + str << ch + else + ungetc + end + else + str << read_escape + end + end + + if close then + if PERCENT_PAREN[ch] == @quoted + nest += 1 + elsif ch == @quoted + nest -= 1 + end + end + end + + if @ltype == "/" + while peek(0) =~ /i|m|x|o|e|s|u|n/ + str << getc + end + end + + if subtype + Token(DLtype2Token[ltype], str) + else + Token(Ltype2Token[ltype], str) + end + ensure + @ltype = nil + @quoted = nil + @lex_state = :EXPR_END + end + end + + def skip_inner_expression + res = "" + nest = 0 + while ch = getc + res << ch + if ch == '}' + break if nest.zero? + nest -= 1 + elsif ch == '{' + nest += 1 + end + end + res + end + + def identify_comment + @ltype = "#" + + comment = '#' + + while ch = getc + # if ch == "\\" #" + # read_escape + # end + if ch == "\n" + @ltype = nil + ungetc + break + end + + comment << ch + end + + return Token(TkCOMMENT, comment) + end + + def read_escape + escape = '' + ch = getc + + case ch + when "\n", "\r", "\f" + escape << ch + when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #" + escape << ch + when /[0-7]/ + ungetc ch + 3.times do + ch = getc + case ch + when /[0-7]/ + escape << ch + when nil + break + else + ungetc + break + end + end + + when "x" + escape << ch + + 2.times do + ch = getc + case ch + when /[0-9a-fA-F]/ + escape << ch + when nil + break + else + ungetc + break + end + end + + when "M" + escape << ch + + ch = getc + if ch != '-' + ungetc + else + escape << ch + + ch = getc + if ch == "\\" #" + ungetc + escape << read_escape + else + escape << ch + end + end + + when "C", "c" #, "^" + escape << ch + + if ch == "C" + ch = getc + + if ch == "-" + escape << ch + ch = getc + escape << ch + + escape << read_escape if ch == "\\" + else + ungetc + end + elsif (ch = getc) == "\\" #" + escape << ch << read_escape + end + else + escape << ch + + # other characters + end + + escape + end + + # :startdoc: + +end + +#RDoc::RubyLex.debug_level = 1 + |