From fcbf63e62c627deae76c1b8cb8c0876c536ed811 Mon Sep 17 00:00:00 2001 From: Jari Vetoniemi Date: Mon, 16 Mar 2020 18:49:26 +0900 Subject: Fresh start --- jni/ruby/lib/rexml/parsers/baseparser.rb | 532 ++++++++++++++++++++ jni/ruby/lib/rexml/parsers/lightparser.rb | 58 +++ jni/ruby/lib/rexml/parsers/pullparser.rb | 196 ++++++++ jni/ruby/lib/rexml/parsers/sax2parser.rb | 272 ++++++++++ jni/ruby/lib/rexml/parsers/streamparser.rb | 52 ++ jni/ruby/lib/rexml/parsers/treeparser.rb | 100 ++++ jni/ruby/lib/rexml/parsers/ultralightparser.rb | 56 +++ jni/ruby/lib/rexml/parsers/xpathparser.rb | 656 +++++++++++++++++++++++++ 8 files changed, 1922 insertions(+) create mode 100644 jni/ruby/lib/rexml/parsers/baseparser.rb create mode 100644 jni/ruby/lib/rexml/parsers/lightparser.rb create mode 100644 jni/ruby/lib/rexml/parsers/pullparser.rb create mode 100644 jni/ruby/lib/rexml/parsers/sax2parser.rb create mode 100644 jni/ruby/lib/rexml/parsers/streamparser.rb create mode 100644 jni/ruby/lib/rexml/parsers/treeparser.rb create mode 100644 jni/ruby/lib/rexml/parsers/ultralightparser.rb create mode 100644 jni/ruby/lib/rexml/parsers/xpathparser.rb (limited to 'jni/ruby/lib/rexml/parsers') diff --git a/jni/ruby/lib/rexml/parsers/baseparser.rb b/jni/ruby/lib/rexml/parsers/baseparser.rb new file mode 100644 index 0000000..6a08b86 --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/baseparser.rb @@ -0,0 +1,532 @@ +require 'rexml/parseexception' +require 'rexml/undefinednamespaceexception' +require 'rexml/source' +require 'set' + +module REXML + module Parsers + # = Using the Pull Parser + # This API is experimental, and subject to change. + # parser = PullParser.new( "texttxet" ) + # while parser.has_next? + # res = parser.next + # puts res[1]['att'] if res.start_tag? and res[0] == 'b' + # end + # See the PullEvent class for information on the content of the results. + # The data is identical to the arguments passed for the various events to + # the StreamListener API. + # + # Notice that: + # parser = PullParser.new( "BAD DOCUMENT" ) + # while parser.has_next? + # res = parser.next + # raise res[1] if res.error? + # end + # + # Nat Price gave me some good ideas for the API. + class BaseParser + LETTER = '[:alpha:]' + DIGIT = '[:digit:]' + + COMBININGCHAR = '' # TODO + EXTENDER = '' # TODO + + NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*" + NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" + + NAMECHAR = '[\-\w\.:]' + NAME = "([\\w:]#{NAMECHAR}*)" + NMTOKEN = "(?:#{NAMECHAR})+" + NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*" + REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)" + REFERENCE_RE = /#{REFERENCE}/ + + DOCTYPE_START = /\A\s*/um + DOCTYPE_PATTERN = /\s*)/um + ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um + COMMENT_START = /\A/um + CDATA_START = /\A/um + CDATA_PATTERN = //um + XMLDECL_START = /\A<\?xml\s/u; + XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um + INSTRUCTION_START = /\A<\?/u + INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um + TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um + CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um + + VERSION = /\bversion\s*=\s*["'](.*?)['"]/um + ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um + STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um + + ENTITY_START = /\A\s*/um + SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um + ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)" + NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)" + ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))" + ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})" + ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')" + DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))" + ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}" + ATTDEF_RE = /#{ATTDEF}/ + ATTLISTDECL_START = /\A\s*/um + NOTATIONDECL_START = /\A\s*/um + SYSTEM = /\A\s*/um + + TEXT_PATTERN = /\A([^<]*)/um + + # Entity constants + PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#" + SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))} + PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')} + EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))" + NDATADECL = "\\s+NDATA\\s+#{NAME}" + PEREFERENCE = "%#{NAME};" + ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))} + PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})" + ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" + PEDECL = "" + GEDECL = "" + ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um + + EREFERENCE = /&(?!#{NAME};)/ + + DEFAULT_ENTITIES = { + 'gt' => [/>/, '>', '>', />/], + 'lt' => [/</, '<', '<', / [/"/, '"', '"', /"/], + "apos" => [/'/, "'", "'", /'/] + } + + + ###################################################################### + # These are patterns to identify common markup errors, to make the + # error messages more informative. + ###################################################################### + MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um + + def initialize( source ) + self.stream = source + @listeners = [] + end + + def add_listener( listener ) + @listeners << listener + end + + attr_reader :source + + def stream=( source ) + @source = SourceFactory.create_from( source ) + @closed = nil + @document_status = nil + @tags = [] + @stack = [] + @entities = [] + @nsstack = [] + end + + def position + if @source.respond_to? :position + @source.position + else + # FIXME + 0 + end + end + + # Returns true if there are no more events + def empty? + return (@source.empty? and @stack.empty?) + end + + # Returns true if there are more events. Synonymous with !empty? + def has_next? + return !(@source.empty? and @stack.empty?) + end + + # Push an event back on the head of the stream. This method + # has (theoretically) infinite depth. + def unshift token + @stack.unshift(token) + end + + # Peek at the +depth+ event in the stack. The first element on the stack + # is at depth 0. If +depth+ is -1, will parse to the end of the input + # stream and return the last event, which is always :end_document. + # Be aware that this causes the stream to be parsed up to the +depth+ + # event, so you can effectively pre-parse the entire document (pull the + # entire thing into memory) using this method. + def peek depth=0 + raise %Q[Illegal argument "#{depth}"] if depth < -1 + temp = [] + if depth == -1 + temp.push(pull()) until empty? + else + while @stack.size+temp.size < depth+1 + temp.push(pull()) + end + end + @stack += temp if temp.size > 0 + @stack[depth] + end + + # Returns the next event. This is a +PullEvent+ object. + def pull + pull_event.tap do |event| + @listeners.each do |listener| + listener.receive event + end + end + end + + def pull_event + if @closed + x, @closed = @closed, nil + return [ :end_element, x ] + end + return [ :end_document ] if empty? + return @stack.shift if @stack.size > 0 + #STDERR.puts @source.encoding + @source.read if @source.buffer.size<2 + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" + if @document_status == nil + #@source.consume( /^\s*/um ) + word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um ) + word = word[1] unless word.nil? + #STDERR.puts "WORD = #{word.inspect}" + case word + when COMMENT_START + return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] + when XMLDECL_START + #STDERR.puts "XMLDECL" + results = @source.match( XMLDECL_PATTERN, true )[1] + version = VERSION.match( results ) + version = version[1] unless version.nil? + encoding = ENCODING.match(results) + encoding = encoding[1] unless encoding.nil? + if need_source_encoding_update?(encoding) + @source.encoding = encoding + end + if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding + encoding = "UTF-16" + end + standalone = STANDALONE.match(results) + standalone = standalone[1] unless standalone.nil? + return [ :xmldecl, version, encoding, standalone ] + when INSTRUCTION_START + return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] + when DOCTYPE_START + md = @source.match( DOCTYPE_PATTERN, true ) + @nsstack.unshift(curr_ns=Set.new) + identity = md[1] + close = md[2] + identity =~ IDENTITY + name = $1 + raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil? + pub_sys = $2.nil? ? nil : $2.strip + long_name = $4.nil? ? nil : $4.strip + uri = $6.nil? ? nil : $6.strip + args = [ :start_doctype, name, pub_sys, long_name, uri ] + if close == ">" + @document_status = :after_doctype + @source.read if @source.buffer.size<2 + md = @source.match(/^\s*/um, true) + @stack << [ :end_doctype ] + else + @document_status = :in_doctype + end + return args + when /^\s+/ + else + @document_status = :after_doctype + @source.read if @source.buffer.size<2 + md = @source.match(/\s*/um, true) + if @source.encoding == "UTF-8" + @source.buffer.force_encoding(::Encoding::UTF_8) + end + end + end + if @document_status == :in_doctype + md = @source.match(/\s*(.*?>)/um) + case md[1] + when SYSTEMENTITY + match = @source.match( SYSTEMENTITY, true )[1] + return [ :externalentity, match ] + + when ELEMENTDECL_START + return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] + + when ENTITY_START + match = @source.match( ENTITYDECL, true ).to_a.compact + match[0] = :entitydecl + ref = false + if match[1] == '%' + ref = true + match.delete_at 1 + end + # Now we have to sort out what kind of entity reference this is + if match[2] == 'SYSTEM' + # External reference + match[3] = match[3][1..-2] # PUBID + match.delete_at(4) if match.size > 4 # Chop out NDATA decl + # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] + elsif match[2] == 'PUBLIC' + # External reference + match[3] = match[3][1..-2] # PUBID + match[4] = match[4][1..-2] # HREF + match.delete_at(5) if match.size > 5 # Chop out NDATA decl + # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] + else + match[2] = match[2][1..-2] + match.pop if match.size == 4 + # match is [ :entity, name, value ] + end + match << '%' if ref + return match + when ATTLISTDECL_START + md = @source.match( ATTLISTDECL_PATTERN, true ) + raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? + element = md[1] + contents = md[0] + + pairs = {} + values = md[0].scan( ATTDEF_RE ) + values.each do |attdef| + unless attdef[3] == "#IMPLIED" + attdef.compact! + val = attdef[3] + val = attdef[4] if val == "#FIXED " + pairs[attdef[0]] = val + if attdef[0] =~ /^xmlns:(.*)/ + @nsstack[0] << $1 + end + end + end + return [ :attlistdecl, element, pairs, contents ] + when NOTATIONDECL_START + md = nil + if @source.match( PUBLIC ) + md = @source.match( PUBLIC, true ) + vals = [md[1],md[2],md[4],md[6]] + elsif @source.match( SYSTEM ) + md = @source.match( SYSTEM, true ) + vals = [md[1],md[2],nil,md[4]] + else + raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source ) + end + return [ :notationdecl, *vals ] + when DOCTYPE_END + @document_status = :after_doctype + @source.match( DOCTYPE_END, true ) + return [ :end_doctype ] + end + end + begin + if @source.buffer[0] == ?< + if @source.buffer[1] == ?/ + @nsstack.shift + last_tag = @tags.pop + #md = @source.match_to_consume( '>', CLOSE_MATCH) + md = @source.match( CLOSE_MATCH, true ) + raise REXML::ParseException.new( "Missing end tag for "+ + "'#{last_tag}' (got \"#{md[1]}\")", + @source) unless last_tag == md[1] + return [ :end_element, last_tag ] + elsif @source.buffer[1] == ?! + md = @source.match(/\A(\s*[^>]*>)/um) + #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" + raise REXML::ParseException.new("Malformed node", @source) unless md + if md[0][2] == ?- + md = @source.match( COMMENT_PATTERN, true ) + + case md[1] + when /--/, /-\z/ + raise REXML::ParseException.new("Malformed comment", @source) + end + + return [ :comment, md[1] ] if md + else + md = @source.match( CDATA_PATTERN, true ) + return [ :cdata, md[1] ] if md + end + raise REXML::ParseException.new( "Declarations can only occur "+ + "in the doctype declaration.", @source) + elsif @source.buffer[1] == ?? + md = @source.match( INSTRUCTION_PATTERN, true ) + return [ :processing_instruction, md[1], md[2] ] if md + raise REXML::ParseException.new( "Bad instruction declaration", + @source) + else + # Get the next tag + md = @source.match(TAG_MATCH, true) + unless md + # Check for missing attribute quotes + raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) + raise REXML::ParseException.new("malformed XML: missing tag start", @source) + end + attributes = {} + prefixes = Set.new + prefixes << md[2] if md[2] + @nsstack.unshift(curr_ns=Set.new) + if md[4].size > 0 + attrs = md[4].scan( ATTRIBUTE_PATTERN ) + raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 + attrs.each do |attr_name, prefix, local_part, quote, value| + if prefix == "xmlns" + if local_part == "xml" + if value != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif local_part == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self) + end + curr_ns << local_part + elsif prefix + prefixes << prefix unless prefix == "xml" + end + + if attributes.has_key?(attr_name) + msg = "Duplicate attribute #{attr_name.inspect}" + raise REXML::ParseException.new(msg, @source, self) + end + + attributes[attr_name] = value + end + end + + # Verify that all of the prefixes have been defined + for prefix in prefixes + unless @nsstack.find{|k| k.member?(prefix)} + raise UndefinedNamespaceException.new(prefix,@source,self) + end + end + + if md[6] + @closed = md[1] + @nsstack.shift + else + @tags.push( md[1] ) + end + return [ :start_element, md[1], attributes ] + end + else + md = @source.match( TEXT_PATTERN, true ) + if md[0].length == 0 + @source.match( /(\s+)/, true ) + end + #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0 + #return [ :text, "" ] if md[0].length == 0 + # unnormalized = Text::unnormalize( md[1], self ) + # return PullEvent.new( :text, md[1], unnormalized ) + return [ :text, md[1] ] + end + rescue REXML::UndefinedNamespaceException + raise + rescue REXML::ParseException + raise + rescue Exception, NameError => error + raise REXML::ParseException.new( "Exception parsing", + @source, self, (error ? error : $!) ) + end + return [ :dummy ] + end + private :pull_event + + def entity( reference, entities ) + value = nil + value = entities[ reference ] if entities + if not value + value = DEFAULT_ENTITIES[ reference ] + value = value[2] if value + end + unnormalize( value, entities ) if value + end + + # Escapes all possible entities + def normalize( input, entities=nil, entity_filter=nil ) + copy = input.clone + # Doing it like this rather than in a loop improves the speed + copy.gsub!( EREFERENCE, '&' ) + entities.each do |key, value| + copy.gsub!( value, "&#{key};" ) unless entity_filter and + entity_filter.include?(entity) + end if entities + copy.gsub!( EREFERENCE, '&' ) + DEFAULT_ENTITIES.each do |key, value| + copy.gsub!( value[3], value[1] ) + end + copy + end + + # Unescapes all possible entities + def unnormalize( string, entities=nil, filter=nil ) + rv = string.clone + rv.gsub!( /\r\n?/, "\n" ) + matches = rv.scan( REFERENCE_RE ) + return rv if matches.size == 0 + rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) { + m=$1 + m = "0#{m}" if m[0] == ?x + [Integer(m)].pack('U*') + } + matches.collect!{|x|x[0]}.compact! + if matches.size > 0 + matches.each do |entity_reference| + unless filter and filter.include?(entity_reference) + entity_value = entity( entity_reference, entities ) + if entity_value + re = /&#{entity_reference};/ + rv.gsub!( re, entity_value ) + else + er = DEFAULT_ENTITIES[entity_reference] + rv.gsub!( er[0], er[2] ) if er + end + end + end + rv.gsub!( /&/, '&' ) + end + rv + end + + private + def need_source_encoding_update?(xml_declaration_encoding) + return false if xml_declaration_encoding.nil? + return false if /\AUTF-16\z/i =~ xml_declaration_encoding + true + end + end + end +end + +=begin + case event[0] + when :start_element + when :text + when :end_element + when :processing_instruction + when :cdata + when :comment + when :xmldecl + when :start_doctype + when :end_doctype + when :externalentity + when :elementdecl + when :entity + when :attlistdecl + when :notationdecl + when :end_doctype + end +=end diff --git a/jni/ruby/lib/rexml/parsers/lightparser.rb b/jni/ruby/lib/rexml/parsers/lightparser.rb new file mode 100644 index 0000000..8104168 --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/lightparser.rb @@ -0,0 +1,58 @@ +require 'rexml/parsers/streamparser' +require 'rexml/parsers/baseparser' +require 'rexml/light/node' + +module REXML + module Parsers + class LightParser + def initialize stream + @stream = stream + @parser = REXML::Parsers::BaseParser.new( stream ) + end + + def add_listener( listener ) + @parser.add_listener( listener ) + end + + def rewind + @stream.rewind + @parser.stream = @stream + end + + def parse + root = context = [ :document ] + while true + event = @parser.pull + case event[0] + when :end_document + break + when :start_element, :start_doctype + new_node = event + context << new_node + new_node[1,0] = [context] + context = new_node + when :end_element, :end_doctype + context = context[1] + else + new_node = event + context << new_node + new_node[1,0] = [context] + end + end + root + end + end + + # An element is an array. The array contains: + # 0 The parent element + # 1 The tag name + # 2 A hash of attributes + # 3..-1 The child elements + # An element is an array of size > 3 + # Text is a String + # PIs are [ :processing_instruction, target, data ] + # Comments are [ :comment, data ] + # DocTypes are DocType structs + # The root is an array with XMLDecls, Text, DocType, Array, Text + end +end diff --git a/jni/ruby/lib/rexml/parsers/pullparser.rb b/jni/ruby/lib/rexml/parsers/pullparser.rb new file mode 100644 index 0000000..68a4ff7 --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/pullparser.rb @@ -0,0 +1,196 @@ +require 'forwardable' + +require 'rexml/parseexception' +require 'rexml/parsers/baseparser' +require 'rexml/xmltokens' + +module REXML + module Parsers + # = Using the Pull Parser + # This API is experimental, and subject to change. + # parser = PullParser.new( "texttxet" ) + # while parser.has_next? + # res = parser.next + # puts res[1]['att'] if res.start_tag? and res[0] == 'b' + # end + # See the PullEvent class for information on the content of the results. + # The data is identical to the arguments passed for the various events to + # the StreamListener API. + # + # Notice that: + # parser = PullParser.new( "BAD DOCUMENT" ) + # while parser.has_next? + # res = parser.next + # raise res[1] if res.error? + # end + # + # Nat Price gave me some good ideas for the API. + class PullParser + include XMLTokens + extend Forwardable + + def_delegators( :@parser, :has_next? ) + def_delegators( :@parser, :entity ) + def_delegators( :@parser, :empty? ) + def_delegators( :@parser, :source ) + + def initialize stream + @entities = {} + @listeners = nil + @parser = BaseParser.new( stream ) + @my_stack = [] + end + + def add_listener( listener ) + @listeners = [] unless @listeners + @listeners << listener + end + + def each + while has_next? + yield self.pull + end + end + + def peek depth=0 + if @my_stack.length <= depth + (depth - @my_stack.length + 1).times { + e = PullEvent.new(@parser.pull) + @my_stack.push(e) + } + end + @my_stack[depth] + end + + def pull + return @my_stack.shift if @my_stack.length > 0 + + event = @parser.pull + case event[0] + when :entitydecl + @entities[ event[1] ] = + event[2] unless event[2] =~ /PUBLIC|SYSTEM/ + when :text + unnormalized = @parser.unnormalize( event[1], @entities ) + event << unnormalized + end + PullEvent.new( event ) + end + + def unshift token + @my_stack.unshift token + end + end + + # A parsing event. The contents of the event are accessed as an +Array?, + # and the type is given either by the ...? methods, or by accessing the + # +type+ accessor. The contents of this object vary from event to event, + # but are identical to the arguments passed to +StreamListener+s for each + # event. + class PullEvent + # The type of this event. Will be one of :tag_start, :tag_end, :text, + # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl, + # :notationdecl, :entity, :cdata, :xmldecl, or :error. + def initialize(arg) + @contents = arg + end + + def []( start, endd=nil) + if start.kind_of? Range + @contents.slice( start.begin+1 .. start.end ) + elsif start.kind_of? Numeric + if endd.nil? + @contents.slice( start+1 ) + else + @contents.slice( start+1, endd ) + end + else + raise "Illegal argument #{start.inspect} (#{start.class})" + end + end + + def event_type + @contents[0] + end + + # Content: [ String tag_name, Hash attributes ] + def start_element? + @contents[0] == :start_element + end + + # Content: [ String tag_name ] + def end_element? + @contents[0] == :end_element + end + + # Content: [ String raw_text, String unnormalized_text ] + def text? + @contents[0] == :text + end + + # Content: [ String text ] + def instruction? + @contents[0] == :processing_instruction + end + + # Content: [ String text ] + def comment? + @contents[0] == :comment + end + + # Content: [ String name, String pub_sys, String long_name, String uri ] + def doctype? + @contents[0] == :start_doctype + end + + # Content: [ String text ] + def attlistdecl? + @contents[0] == :attlistdecl + end + + # Content: [ String text ] + def elementdecl? + @contents[0] == :elementdecl + end + + # Due to the wonders of DTDs, an entity declaration can be just about + # anything. There's no way to normalize it; you'll have to interpret the + # content yourself. However, the following is true: + # + # * If the entity declaration is an internal entity: + # [ String name, String value ] + # Content: [ String text ] + def entitydecl? + @contents[0] == :entitydecl + end + + # Content: [ String text ] + def notationdecl? + @contents[0] == :notationdecl + end + + # Content: [ String text ] + def entity? + @contents[0] == :entity + end + + # Content: [ String text ] + def cdata? + @contents[0] == :cdata + end + + # Content: [ String version, String encoding, String standalone ] + def xmldecl? + @contents[0] == :xmldecl + end + + def error? + @contents[0] == :error + end + + def inspect + @contents[0].to_s + ": " + @contents[1..-1].inspect + end + end + end +end diff --git a/jni/ruby/lib/rexml/parsers/sax2parser.rb b/jni/ruby/lib/rexml/parsers/sax2parser.rb new file mode 100644 index 0000000..a72c0a7 --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/sax2parser.rb @@ -0,0 +1,272 @@ +require 'rexml/parsers/baseparser' +require 'rexml/parseexception' +require 'rexml/namespace' +require 'rexml/text' + +module REXML + module Parsers + # SAX2Parser + class SAX2Parser + def initialize source + @parser = BaseParser.new(source) + @listeners = [] + @procs = [] + @namespace_stack = [] + @has_listeners = false + @tag_stack = [] + @entities = {} + end + + def source + @parser.source + end + + def add_listener( listener ) + @parser.add_listener( listener ) + end + + # Listen arguments: + # + # Symbol, Array, Block + # Listen to Symbol events on Array elements + # Symbol, Block + # Listen to Symbol events + # Array, Listener + # Listen to all events on Array elements + # Array, Block + # Listen to :start_element events on Array elements + # Listener + # Listen to All events + # + # Symbol can be one of: :start_element, :end_element, + # :start_prefix_mapping, :end_prefix_mapping, :characters, + # :processing_instruction, :doctype, :attlistdecl, :elementdecl, + # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment + # + # There is an additional symbol that can be listened for: :progress. + # This will be called for every event generated, passing in the current + # stream position. + # + # Array contains regular expressions or strings which will be matched + # against fully qualified element names. + # + # Listener must implement the methods in SAX2Listener + # + # Block will be passed the same arguments as a SAX2Listener method would + # be, where the method name is the same as the matched Symbol. + # See the SAX2Listener for more information. + def listen( *args, &blok ) + if args[0].kind_of? Symbol + if args.size == 2 + args[1].each { |match| @procs << [args[0], match, blok] } + else + add( [args[0], nil, blok] ) + end + elsif args[0].kind_of? Array + if args.size == 2 + args[0].each { |match| add( [nil, match, args[1]] ) } + else + args[0].each { |match| add( [ :start_element, match, blok ] ) } + end + else + add([nil, nil, args[0]]) + end + end + + def deafen( listener=nil, &blok ) + if listener + @listeners.delete_if {|item| item[-1] == listener } + @has_listeners = false if @listeners.size == 0 + else + @procs.delete_if {|item| item[-1] == blok } + end + end + + def parse + @procs.each { |sym,match,block| block.call if sym == :start_document } + @listeners.each { |sym,match,block| + block.start_document if sym == :start_document or sym.nil? + } + context = [] + while true + event = @parser.pull + case event[0] + when :end_document + handle( :end_document ) + break + when :start_doctype + handle( :doctype, *event[1..-1]) + when :end_doctype + context = context[1] + when :start_element + @tag_stack.push(event[1]) + # find the observers for namespaces + procs = get_procs( :start_prefix_mapping, event[1] ) + listeners = get_listeners( :start_prefix_mapping, event[1] ) + if procs or listeners + # break out the namespace declarations + # The attributes live in event[2] + event[2].each {|n, v| event[2][n] = @parser.normalize(v)} + nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ } + nsdecl.collect! { |n, value| [ n[6..-1], value ] } + @namespace_stack.push({}) + nsdecl.each do |n,v| + @namespace_stack[-1][n] = v + # notify observers of namespaces + procs.each { |ob| ob.call( n, v ) } if procs + listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners + end + end + event[1] =~ Namespace::NAMESPLIT + prefix = $1 + local = $2 + uri = get_namespace(prefix) + # find the observers for start_element + procs = get_procs( :start_element, event[1] ) + listeners = get_listeners( :start_element, event[1] ) + # notify observers + procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs + listeners.each { |ob| + ob.start_element( uri, local, event[1], event[2] ) + } if listeners + when :end_element + @tag_stack.pop + event[1] =~ Namespace::NAMESPLIT + prefix = $1 + local = $2 + uri = get_namespace(prefix) + # find the observers for start_element + procs = get_procs( :end_element, event[1] ) + listeners = get_listeners( :end_element, event[1] ) + # notify observers + procs.each { |ob| ob.call( uri, local, event[1] ) } if procs + listeners.each { |ob| + ob.end_element( uri, local, event[1] ) + } if listeners + + namespace_mapping = @namespace_stack.pop + # find the observers for namespaces + procs = get_procs( :end_prefix_mapping, event[1] ) + listeners = get_listeners( :end_prefix_mapping, event[1] ) + if procs or listeners + namespace_mapping.each do |ns_prefix, ns_uri| + # notify observers of namespaces + procs.each { |ob| ob.call( ns_prefix ) } if procs + listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners + end + end + when :text + #normalized = @parser.normalize( event[1] ) + #handle( :characters, normalized ) + copy = event[1].clone + + esub = proc { |match| + if @entities.has_key?($1) + @entities[$1].gsub(Text::REFERENCE, &esub) + else + match + end + } + + copy.gsub!( Text::REFERENCE, &esub ) + copy.gsub!( Text::NUMERICENTITY ) {|m| + m=$1 + m = "0#{m}" if m[0] == ?x + [Integer(m)].pack('U*') + } + handle( :characters, copy ) + when :entitydecl + handle_entitydecl( event ) + when :processing_instruction, :comment, :attlistdecl, + :elementdecl, :cdata, :notationdecl, :xmldecl + handle( *event ) + end + handle( :progress, @parser.position ) + end + end + + private + def handle( symbol, *arguments ) + tag = @tag_stack[-1] + procs = get_procs( symbol, tag ) + listeners = get_listeners( symbol, tag ) + # notify observers + procs.each { |ob| ob.call( *arguments ) } if procs + listeners.each { |l| + l.send( symbol.to_s, *arguments ) + } if listeners + end + + def handle_entitydecl( event ) + @entities[ event[1] ] = event[2] if event.size == 3 + parameter_reference_p = false + case event[2] + when "SYSTEM" + if event.size == 5 + if event.last == "%" + parameter_reference_p = true + else + event[4, 0] = "NDATA" + end + end + when "PUBLIC" + if event.size == 6 + if event.last == "%" + parameter_reference_p = true + else + event[5, 0] = "NDATA" + end + end + else + parameter_reference_p = (event.size == 4) + end + event[1, 0] = event.pop if parameter_reference_p + handle( event[0], event[1..-1] ) + end + + # The following methods are duplicates, but it is faster than using + # a helper + def get_procs( symbol, name ) + return nil if @procs.size == 0 + @procs.find_all do |sym, match, block| + ( + (sym.nil? or symbol == sym) and + ((name.nil? and match.nil?) or match.nil? or ( + (name == match) or + (match.kind_of? Regexp and name =~ match) + ) + ) + ) + end.collect{|x| x[-1]} + end + def get_listeners( symbol, name ) + return nil if @listeners.size == 0 + @listeners.find_all do |sym, match, block| + ( + (sym.nil? or symbol == sym) and + ((name.nil? and match.nil?) or match.nil? or ( + (name == match) or + (match.kind_of? Regexp and name =~ match) + ) + ) + ) + end.collect{|x| x[-1]} + end + + def add( pair ) + if pair[-1].respond_to? :call + @procs << pair unless @procs.include? pair + else + @listeners << pair unless @listeners.include? pair + @has_listeners = true + end + end + + def get_namespace( prefix ) + uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) || + (@namespace_stack.find { |ns| not ns[nil].nil? }) + uris[-1][prefix] unless uris.nil? or 0 == uris.size + end + end + end +end diff --git a/jni/ruby/lib/rexml/parsers/streamparser.rb b/jni/ruby/lib/rexml/parsers/streamparser.rb new file mode 100644 index 0000000..9ea65ed --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/streamparser.rb @@ -0,0 +1,52 @@ +require "rexml/parsers/baseparser" + +module REXML + module Parsers + class StreamParser + def initialize source, listener + @listener = listener + @parser = BaseParser.new( source ) + end + + def add_listener( listener ) + @parser.add_listener( listener ) + end + + def parse + # entity string + while true + event = @parser.pull + case event[0] + when :end_document + return + when :start_element + attrs = event[2].each do |n, v| + event[2][n] = @parser.unnormalize( v ) + end + @listener.tag_start( event[1], attrs ) + when :end_element + @listener.tag_end( event[1] ) + when :text + normalized = @parser.unnormalize( event[1] ) + @listener.text( normalized ) + when :processing_instruction + @listener.instruction( *event[1,2] ) + when :start_doctype + @listener.doctype( *event[1..-1] ) + when :end_doctype + # FIXME: remove this condition for milestone:3.2 + @listener.doctype_end if @listener.respond_to? :doctype_end + when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl + @listener.send( event[0].to_s, *event[1..-1] ) + when :entitydecl, :notationdecl + @listener.send( event[0].to_s, event[1..-1] ) + when :externalentity + entity_reference = event[1] + content = entity_reference.gsub(/\A%|;\z/, "") + @listener.entity(content) + end + end + end + end + end +end diff --git a/jni/ruby/lib/rexml/parsers/treeparser.rb b/jni/ruby/lib/rexml/parsers/treeparser.rb new file mode 100644 index 0000000..68edb77 --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/treeparser.rb @@ -0,0 +1,100 @@ +require 'rexml/validation/validationexception' +require 'rexml/undefinednamespaceexception' + +module REXML + module Parsers + class TreeParser + def initialize( source, build_context = Document.new ) + @build_context = build_context + @parser = Parsers::BaseParser.new( source ) + end + + def add_listener( listener ) + @parser.add_listener( listener ) + end + + def parse + tag_stack = [] + in_doctype = false + entities = nil + begin + while true + event = @parser.pull + #STDERR.puts "TREEPARSER GOT #{event.inspect}" + case event[0] + when :end_document + unless tag_stack.empty? + raise ParseException.new("No close tag for #{@build_context.xpath}", + @parser.source, @parser) + end + return + when :start_element + tag_stack.push(event[1]) + el = @build_context = @build_context.add_element( event[1] ) + event[2].each do |key, value| + el.attributes[key]=Attribute.new(key,value,self) + end + when :end_element + tag_stack.pop + @build_context = @build_context.parent + when :text + if not in_doctype + if @build_context[-1].instance_of? Text + @build_context[-1] << event[1] + else + @build_context.add( + Text.new(event[1], @build_context.whitespace, nil, true) + ) unless ( + @build_context.ignore_whitespace_nodes and + event[1].strip.size==0 + ) + end + end + when :comment + c = Comment.new( event[1] ) + @build_context.add( c ) + when :cdata + c = CData.new( event[1] ) + @build_context.add( c ) + when :processing_instruction + @build_context.add( Instruction.new( event[1], event[2] ) ) + when :end_doctype + in_doctype = false + entities.each { |k,v| entities[k] = @build_context.entities[k].value } + @build_context = @build_context.parent + when :start_doctype + doctype = DocType.new( event[1..-1], @build_context ) + @build_context = doctype + entities = {} + in_doctype = true + when :attlistdecl + n = AttlistDecl.new( event[1..-1] ) + @build_context.add( n ) + when :externalentity + n = ExternalEntity.new( event[1] ) + @build_context.add( n ) + when :elementdecl + n = ElementDecl.new( event[1] ) + @build_context.add(n) + when :entitydecl + entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/ + @build_context.add(Entity.new(event)) + when :notationdecl + n = NotationDecl.new( *event[1..-1] ) + @build_context.add( n ) + when :xmldecl + x = XMLDecl.new( event[1], event[2], event[3] ) + @build_context.add( x ) + end + end + rescue REXML::Validation::ValidationException + raise + rescue REXML::ParseException + raise + rescue + raise ParseException.new( $!.message, @parser.source, @parser, $! ) + end + end + end + end +end diff --git a/jni/ruby/lib/rexml/parsers/ultralightparser.rb b/jni/ruby/lib/rexml/parsers/ultralightparser.rb new file mode 100644 index 0000000..4e2d7a8 --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/ultralightparser.rb @@ -0,0 +1,56 @@ +require 'rexml/parsers/streamparser' +require 'rexml/parsers/baseparser' + +module REXML + module Parsers + class UltraLightParser + def initialize stream + @stream = stream + @parser = REXML::Parsers::BaseParser.new( stream ) + end + + def add_listener( listener ) + @parser.add_listener( listener ) + end + + def rewind + @stream.rewind + @parser.stream = @stream + end + + def parse + root = context = [] + while true + event = @parser.pull + case event[0] + when :end_document + break + when :end_doctype + context = context[1] + when :start_element, :start_doctype + context << event + event[1,0] = [context] + context = event + when :end_element + context = context[1] + else + context << event + end + end + root + end + end + + # An element is an array. The array contains: + # 0 The parent element + # 1 The tag name + # 2 A hash of attributes + # 3..-1 The child elements + # An element is an array of size > 3 + # Text is a String + # PIs are [ :processing_instruction, target, data ] + # Comments are [ :comment, data ] + # DocTypes are DocType structs + # The root is an array with XMLDecls, Text, DocType, Array, Text + end +end diff --git a/jni/ruby/lib/rexml/parsers/xpathparser.rb b/jni/ruby/lib/rexml/parsers/xpathparser.rb new file mode 100644 index 0000000..57767fb --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/xpathparser.rb @@ -0,0 +1,656 @@ +require 'rexml/namespace' +require 'rexml/xmltokens' + +module REXML + module Parsers + # You don't want to use this class. Really. Use XPath, which is a wrapper + # for this class. Believe me. You don't want to poke around in here. + # There is strange, dark magic at work in this code. Beware. Go back! Go + # back while you still can! + class XPathParser + include XMLTokens + LITERAL = /^'([^']*)'|^"([^"]*)"/u + + def namespaces=( namespaces ) + Functions::namespace_context = namespaces + @namespaces = namespaces + end + + def parse path + path = path.dup + path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces + path.gsub!( /\s+([\]\)])/, '\1') + parsed = [] + OrExpr(path, parsed) + parsed + end + + def predicate path + parsed = [] + Predicate( "[#{path}]", parsed ) + parsed + end + + def abbreviate( path ) + path = path.kind_of?(String) ? parse( path ) : path + string = "" + document = false + while path.size > 0 + op = path.shift + case op + when :node + when :attribute + string << "/" if string.size > 0 + string << "@" + when :child + string << "/" if string.size > 0 + when :descendant_or_self + string << "/" + when :self + string << "." + when :parent + string << ".." + when :any + string << "*" + when :text + string << "text()" + when :following, :following_sibling, + :ancestor, :ancestor_or_self, :descendant, + :namespace, :preceding, :preceding_sibling + string << "/" unless string.size == 0 + string << op.to_s.tr("_", "-") + string << "::" + when :qname + prefix = path.shift + name = path.shift + string << prefix+":" if prefix.size > 0 + string << name + when :predicate + string << '[' + string << predicate_to_string( path.shift ) {|x| abbreviate( x ) } + string << ']' + when :document + document = true + when :function + string << path.shift + string << "( " + string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )} + string << " )" + when :literal + string << %Q{ "#{path.shift}" } + else + string << "/" unless string.size == 0 + string << "UNKNOWN(" + string << op.inspect + string << ")" + end + end + string = "/"+string if document + return string + end + + def expand( path ) + path = path.kind_of?(String) ? parse( path ) : path + string = "" + document = false + while path.size > 0 + op = path.shift + case op + when :node + string << "node()" + when :attribute, :child, :following, :following_sibling, + :ancestor, :ancestor_or_self, :descendant, :descendant_or_self, + :namespace, :preceding, :preceding_sibling, :self, :parent + string << "/" unless string.size == 0 + string << op.to_s.tr("_", "-") + string << "::" + when :any + string << "*" + when :qname + prefix = path.shift + name = path.shift + string << prefix+":" if prefix.size > 0 + string << name + when :predicate + string << '[' + string << predicate_to_string( path.shift ) { |x| expand(x) } + string << ']' + when :document + document = true + else + string << "/" unless string.size == 0 + string << "UNKNOWN(" + string << op.inspect + string << ")" + end + end + string = "/"+string if document + return string + end + + def predicate_to_string( path, &block ) + string = "" + case path[0] + when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union + op = path.shift + case op + when :eq + op = "=" + when :lt + op = "<" + when :gt + op = ">" + when :lteq + op = "<=" + when :gteq + op = ">=" + when :neq + op = "!=" + when :union + op = "|" + end + left = predicate_to_string( path.shift, &block ) + right = predicate_to_string( path.shift, &block ) + string << " " + string << left + string << " " + string << op.to_s + string << " " + string << right + string << " " + when :function + path.shift + name = path.shift + string << name + string << "( " + string << predicate_to_string( path.shift, &block ) + string << " )" + when :literal + path.shift + string << " " + string << path.shift.inspect + string << " " + else + string << " " + string << yield( path ) + string << " " + end + return string.squeeze(" ") + end + + private + #LocationPath + # | RelativeLocationPath + # | '/' RelativeLocationPath? + # | '//' RelativeLocationPath + def LocationPath path, parsed + path = path.strip + if path[0] == ?/ + parsed << :document + if path[1] == ?/ + parsed << :descendant_or_self + parsed << :node + path = path[2..-1] + else + path = path[1..-1] + end + end + return RelativeLocationPath( path, parsed ) if path.size > 0 + end + + #RelativeLocationPath + # | Step + # | (AXIS_NAME '::' | '@' | '') AxisSpecifier + # NodeTest + # Predicate + # | '.' | '..' AbbreviatedStep + # | RelativeLocationPath '/' Step + # | RelativeLocationPath '//' Step + AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/ + def RelativeLocationPath path, parsed + while path.size > 0 + # (axis or @ or ) nodetest predicate > + # OR > / Step + # (. or ..) > + if path[0] == ?. + if path[1] == ?. + parsed << :parent + parsed << :node + path = path[2..-1] + else + parsed << :self + parsed << :node + path = path[1..-1] + end + else + if path[0] == ?@ + parsed << :attribute + path = path[1..-1] + # Goto Nodetest + elsif path =~ AXIS + parsed << $1.tr('-','_').intern + path = $' + # Goto Nodetest + else + parsed << :child + end + + n = [] + path = NodeTest( path, n) + + if path[0] == ?[ + path = Predicate( path, n ) + end + + parsed.concat(n) + end + + if path.size > 0 + if path[0] == ?/ + if path[1] == ?/ + parsed << :descendant_or_self + parsed << :node + path = path[2..-1] + else + path = path[1..-1] + end + else + return path + end + end + end + return path + end + + # Returns a 1-1 map of the nodeset + # The contents of the resulting array are either: + # true/false, if a positive match + # String, if a name match + #NodeTest + # | ('*' | NCNAME ':' '*' | QNAME) NameTest + # | NODE_TYPE '(' ')' NodeType + # | PI '(' LITERAL ')' PI + # | '[' expr ']' Predicate + NCNAMETEST= /^(#{NCNAME_STR}):\*/u + QNAME = Namespace::NAMESPLIT + NODE_TYPE = /^(comment|text|node)\(\s*\)/m + PI = /^processing-instruction\(/ + def NodeTest path, parsed + case path + when /^\*/ + path = $' + parsed << :any + when NODE_TYPE + type = $1 + path = $' + parsed << type.tr('-', '_').intern + when PI + path = $' + literal = nil + if path !~ /^\s*\)/ + path =~ LITERAL + literal = $1 + path = $' + raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?) + path = path[1..-1] + end + parsed << :processing_instruction + parsed << (literal || '') + when NCNAMETEST + prefix = $1 + path = $' + parsed << :namespace + parsed << prefix + when QNAME + prefix = $1 + name = $2 + path = $' + prefix = "" unless prefix + parsed << :qname + parsed << prefix + parsed << name + end + return path + end + + # Filters the supplied nodeset on the predicate(s) + def Predicate path, parsed + return nil unless path[0] == ?[ + predicates = [] + while path[0] == ?[ + path, expr = get_group(path) + predicates << expr[1..-2] if expr + end + predicates.each{ |pred| + preds = [] + parsed << :predicate + parsed << preds + OrExpr(pred, preds) + } + path + end + + # The following return arrays of true/false, a 1-1 mapping of the + # supplied nodeset, except for axe(), which returns a filtered + # nodeset + + #| OrExpr S 'or' S AndExpr + #| AndExpr + def OrExpr path, parsed + n = [] + rest = AndExpr( path, n ) + if rest != path + while rest =~ /^\s*( or )/ + n = [ :or, n, [] ] + rest = AndExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end + + #| AndExpr S 'and' S EqualityExpr + #| EqualityExpr + def AndExpr path, parsed + n = [] + rest = EqualityExpr( path, n ) + if rest != path + while rest =~ /^\s*( and )/ + n = [ :and, n, [] ] + rest = EqualityExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end + + #| EqualityExpr ('=' | '!=') RelationalExpr + #| RelationalExpr + def EqualityExpr path, parsed + n = [] + rest = RelationalExpr( path, n ) + if rest != path + while rest =~ /^\s*(!?=)\s*/ + if $1[0] == ?! + n = [ :neq, n, [] ] + else + n = [ :eq, n, [] ] + end + rest = RelationalExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end + + #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr + #| AdditiveExpr + def RelationalExpr path, parsed + n = [] + rest = AdditiveExpr( path, n ) + if rest != path + while rest =~ /^\s*([<>]=?)\s*/ + if $1[0] == ?< + sym = "lt" + else + sym = "gt" + end + sym << "eq" if $1[-1] == ?= + n = [ sym.intern, n, [] ] + rest = AdditiveExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end + + #| AdditiveExpr ('+' | S '-') MultiplicativeExpr + #| MultiplicativeExpr + def AdditiveExpr path, parsed + n = [] + rest = MultiplicativeExpr( path, n ) + if rest != path + while rest =~ /^\s*(\+| -)\s*/ + if $1[0] == ?+ + n = [ :plus, n, [] ] + else + n = [ :minus, n, [] ] + end + rest = MultiplicativeExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end + + #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr + #| UnaryExpr + def MultiplicativeExpr path, parsed + n = [] + rest = UnaryExpr( path, n ) + if rest != path + while rest =~ /^\s*(\*| div | mod )\s*/ + if $1[0] == ?* + n = [ :mult, n, [] ] + elsif $1.include?( "div" ) + n = [ :div, n, [] ] + else + n = [ :mod, n, [] ] + end + rest = UnaryExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end + + #| '-' UnaryExpr + #| UnionExpr + def UnaryExpr path, parsed + path =~ /^(\-*)/ + path = $' + if $1 and (($1.size % 2) != 0) + mult = -1 + else + mult = 1 + end + parsed << :neg if mult < 0 + + n = [] + path = UnionExpr( path, n ) + parsed.concat( n ) + path + end + + #| UnionExpr '|' PathExpr + #| PathExpr + def UnionExpr path, parsed + n = [] + rest = PathExpr( path, n ) + if rest != path + while rest =~ /^\s*(\|)\s*/ + n = [ :union, n, [] ] + rest = PathExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace( n ) + elsif n.size > 0 + parsed << n + end + rest + end + + #| LocationPath + #| FilterExpr ('/' | '//') RelativeLocationPath + def PathExpr path, parsed + path =~ /^\s*/ + path = $' + n = [] + rest = FilterExpr( path, n ) + if rest != path + if rest and rest[0] == ?/ + return RelativeLocationPath(rest, n) + end + end + rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/ + parsed.concat(n) + return rest + end + + #| FilterExpr Predicate + #| PrimaryExpr + def FilterExpr path, parsed + n = [] + path = PrimaryExpr( path, n ) + path = Predicate(path, n) if path and path[0] == ?[ + parsed.concat(n) + path + end + + #| VARIABLE_REFERENCE + #| '(' expr ')' + #| LITERAL + #| NUMBER + #| FunctionCall + VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u + NUMBER = /^(\d*\.?\d+)/ + NT = /^comment|text|processing-instruction|node$/ + def PrimaryExpr path, parsed + case path + when VARIABLE_REFERENCE + varname = $1 + path = $' + parsed << :variable + parsed << varname + #arry << @variables[ varname ] + when /^(\w[-\w]*)(?:\()/ + fname = $1 + tmp = $' + return path if fname =~ NT + path = tmp + parsed << :function + parsed << fname + path = FunctionCall(path, parsed) + when NUMBER + varname = $1.nil? ? $2 : $1 + path = $' + parsed << :literal + parsed << (varname.include?('.') ? varname.to_f : varname.to_i) + when LITERAL + varname = $1.nil? ? $2 : $1 + path = $' + parsed << :literal + parsed << varname + when /^\(/ #/ + path, contents = get_group(path) + contents = contents[1..-2] + n = [] + OrExpr( contents, n ) + parsed.concat(n) + end + path + end + + #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' + def FunctionCall rest, parsed + path, arguments = parse_args(rest) + argset = [] + for argument in arguments + args = [] + OrExpr( argument, args ) + argset << args + end + parsed << argset + path + end + + # get_group( '[foo]bar' ) -> ['bar', '[foo]'] + def get_group string + ind = 0 + depth = 0 + st = string[0,1] + en = (st == "(" ? ")" : "]") + begin + case string[ind,1] + when st + depth += 1 + when en + depth -= 1 + end + ind += 1 + end while depth > 0 and ind < string.length + return nil unless depth==0 + [string[ind..-1], string[0..ind-1]] + end + + def parse_args( string ) + arguments = [] + ind = 0 + inquot = false + inapos = false + depth = 1 + begin + case string[ind] + when ?" + inquot = !inquot unless inapos + when ?' + inapos = !inapos unless inquot + else + unless inquot or inapos + case string[ind] + when ?( + depth += 1 + if depth == 1 + string = string[1..-1] + ind -= 1 + end + when ?) + depth -= 1 + if depth == 0 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + end + when ?, + if depth == 1 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + ind = -1 + end + end + end + end + ind += 1 + end while depth > 0 and ind < string.length + return nil unless depth==0 + [string,arguments] + end + end + end +end -- cgit v1.2.3