summaryrefslogtreecommitdiff
path: root/jni/ruby/lib/rexml/parsers
diff options
context:
space:
mode:
authorJari Vetoniemi <jari.vetoniemi@indooratlas.com>2020-03-16 18:49:26 +0900
committerJari Vetoniemi <jari.vetoniemi@indooratlas.com>2020-03-30 00:39:06 +0900
commitfcbf63e62c627deae76c1b8cb8c0876c536ed811 (patch)
tree64cb17de3f41a2b6fef2368028fbd00349946994 /jni/ruby/lib/rexml/parsers
Fresh start
Diffstat (limited to 'jni/ruby/lib/rexml/parsers')
-rw-r--r--jni/ruby/lib/rexml/parsers/baseparser.rb532
-rw-r--r--jni/ruby/lib/rexml/parsers/lightparser.rb58
-rw-r--r--jni/ruby/lib/rexml/parsers/pullparser.rb196
-rw-r--r--jni/ruby/lib/rexml/parsers/sax2parser.rb272
-rw-r--r--jni/ruby/lib/rexml/parsers/streamparser.rb52
-rw-r--r--jni/ruby/lib/rexml/parsers/treeparser.rb100
-rw-r--r--jni/ruby/lib/rexml/parsers/ultralightparser.rb56
-rw-r--r--jni/ruby/lib/rexml/parsers/xpathparser.rb656
8 files changed, 1922 insertions, 0 deletions
diff --git a/jni/ruby/lib/rexml/parsers/baseparser.rb b/jni/ruby/lib/rexml/parsers/baseparser.rb
new file mode 100644
index 0000000..6a08b86
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/baseparser.rb
@@ -0,0 +1,532 @@
+require 'rexml/parseexception'
+require 'rexml/undefinednamespaceexception'
+require 'rexml/source'
+require 'set'
+
+module REXML
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class BaseParser
+ LETTER = '[:alpha:]'
+ DIGIT = '[:digit:]'
+
+ COMBININGCHAR = '' # TODO
+ EXTENDER = '' # TODO
+
+ NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*"
+ NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+
+ NAMECHAR = '[\-\w\.:]'
+ NAME = "([\\w:]#{NAMECHAR}*)"
+ NMTOKEN = "(?:#{NAMECHAR})+"
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+ REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
+ REFERENCE_RE = /#{REFERENCE}/
+
+ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+ DOCTYPE_END = /\A\s*\]\s*>/um
+ DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
+ COMMENT_START = /\A<!--/u
+ COMMENT_PATTERN = /<!--(.*?)-->/um
+ CDATA_START = /\A<!\[CDATA\[/u
+ CDATA_END = /\A\s*\]\s*>/um
+ CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+ XMLDECL_START = /\A<\?xml\s/u;
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
+ INSTRUCTION_START = /\A<\?/u
+ INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
+ CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+
+ VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+ ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
+ STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
+
+ ENTITY_START = /\A\s*<!ENTITY/
+ IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
+ ELEMENTDECL_START = /\A\s*<!ELEMENT/um
+ ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
+ SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
+ ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+ NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+ ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+ ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+ ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+ DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+ ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+ ATTDEF_RE = /#{ATTDEF}/
+ ATTLISTDECL_START = /\A\s*<!ATTLIST/um
+ ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
+ PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
+ SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
+
+ TEXT_PATTERN = /\A([^<]*)/um
+
+ # Entity constants
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
+ PEREFERENCE = "%#{NAME};"
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+
+ EREFERENCE = /&(?!#{NAME};)/
+
+ DEFAULT_ENTITIES = {
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
+ }
+
+
+ ######################################################################
+ # These are patterns to identify common markup errors, to make the
+ # error messages more informative.
+ ######################################################################
+ MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
+
+ def initialize( source )
+ self.stream = source
+ @listeners = []
+ end
+
+ def add_listener( listener )
+ @listeners << listener
+ end
+
+ attr_reader :source
+
+ def stream=( source )
+ @source = SourceFactory.create_from( source )
+ @closed = nil
+ @document_status = nil
+ @tags = []
+ @stack = []
+ @entities = []
+ @nsstack = []
+ end
+
+ def position
+ if @source.respond_to? :position
+ @source.position
+ else
+ # FIXME
+ 0
+ end
+ end
+
+ # Returns true if there are no more events
+ def empty?
+ return (@source.empty? and @stack.empty?)
+ end
+
+ # Returns true if there are more events. Synonymous with !empty?
+ def has_next?
+ return !(@source.empty? and @stack.empty?)
+ end
+
+ # Push an event back on the head of the stream. This method
+ # has (theoretically) infinite depth.
+ def unshift token
+ @stack.unshift(token)
+ end
+
+ # Peek at the +depth+ event in the stack. The first element on the stack
+ # is at depth 0. If +depth+ is -1, will parse to the end of the input
+ # stream and return the last event, which is always :end_document.
+ # Be aware that this causes the stream to be parsed up to the +depth+
+ # event, so you can effectively pre-parse the entire document (pull the
+ # entire thing into memory) using this method.
+ def peek depth=0
+ raise %Q[Illegal argument "#{depth}"] if depth < -1
+ temp = []
+ if depth == -1
+ temp.push(pull()) until empty?
+ else
+ while @stack.size+temp.size < depth+1
+ temp.push(pull())
+ end
+ end
+ @stack += temp if temp.size > 0
+ @stack[depth]
+ end
+
+ # Returns the next event. This is a +PullEvent+ object.
+ def pull
+ pull_event.tap do |event|
+ @listeners.each do |listener|
+ listener.receive event
+ end
+ end
+ end
+
+ def pull_event
+ if @closed
+ x, @closed = @closed, nil
+ return [ :end_element, x ]
+ end
+ return [ :end_document ] if empty?
+ return @stack.shift if @stack.size > 0
+ #STDERR.puts @source.encoding
+ @source.read if @source.buffer.size<2
+ #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
+ if @document_status == nil
+ #@source.consume( /^\s*/um )
+ word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
+ word = word[1] unless word.nil?
+ #STDERR.puts "WORD = #{word.inspect}"
+ case word
+ when COMMENT_START
+ return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+ when XMLDECL_START
+ #STDERR.puts "XMLDECL"
+ results = @source.match( XMLDECL_PATTERN, true )[1]
+ version = VERSION.match( results )
+ version = version[1] unless version.nil?
+ encoding = ENCODING.match(results)
+ encoding = encoding[1] unless encoding.nil?
+ if need_source_encoding_update?(encoding)
+ @source.encoding = encoding
+ end
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
+ encoding = "UTF-16"
+ end
+ standalone = STANDALONE.match(results)
+ standalone = standalone[1] unless standalone.nil?
+ return [ :xmldecl, version, encoding, standalone ]
+ when INSTRUCTION_START
+ return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+ when DOCTYPE_START
+ md = @source.match( DOCTYPE_PATTERN, true )
+ @nsstack.unshift(curr_ns=Set.new)
+ identity = md[1]
+ close = md[2]
+ identity =~ IDENTITY
+ name = $1
+ raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
+ pub_sys = $2.nil? ? nil : $2.strip
+ long_name = $4.nil? ? nil : $4.strip
+ uri = $6.nil? ? nil : $6.strip
+ args = [ :start_doctype, name, pub_sys, long_name, uri ]
+ if close == ">"
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size<2
+ md = @source.match(/^\s*/um, true)
+ @stack << [ :end_doctype ]
+ else
+ @document_status = :in_doctype
+ end
+ return args
+ when /^\s+/
+ else
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size<2
+ md = @source.match(/\s*/um, true)
+ if @source.encoding == "UTF-8"
+ @source.buffer.force_encoding(::Encoding::UTF_8)
+ end
+ end
+ end
+ if @document_status == :in_doctype
+ md = @source.match(/\s*(.*?>)/um)
+ case md[1]
+ when SYSTEMENTITY
+ match = @source.match( SYSTEMENTITY, true )[1]
+ return [ :externalentity, match ]
+
+ when ELEMENTDECL_START
+ return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+
+ when ENTITY_START
+ match = @source.match( ENTITYDECL, true ).to_a.compact
+ match[0] = :entitydecl
+ ref = false
+ if match[1] == '%'
+ ref = true
+ match.delete_at 1
+ end
+ # Now we have to sort out what kind of entity reference this is
+ if match[2] == 'SYSTEM'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+ elsif match[2] == 'PUBLIC'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match[4] = match[4][1..-2] # HREF
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
+ else
+ match[2] = match[2][1..-2]
+ match.pop if match.size == 4
+ # match is [ :entity, name, value ]
+ end
+ match << '%' if ref
+ return match
+ when ATTLISTDECL_START
+ md = @source.match( ATTLISTDECL_PATTERN, true )
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+ element = md[1]
+ contents = md[0]
+
+ pairs = {}
+ values = md[0].scan( ATTDEF_RE )
+ values.each do |attdef|
+ unless attdef[3] == "#IMPLIED"
+ attdef.compact!
+ val = attdef[3]
+ val = attdef[4] if val == "#FIXED "
+ pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @nsstack[0] << $1
+ end
+ end
+ end
+ return [ :attlistdecl, element, pairs, contents ]
+ when NOTATIONDECL_START
+ md = nil
+ if @source.match( PUBLIC )
+ md = @source.match( PUBLIC, true )
+ vals = [md[1],md[2],md[4],md[6]]
+ elsif @source.match( SYSTEM )
+ md = @source.match( SYSTEM, true )
+ vals = [md[1],md[2],nil,md[4]]
+ else
+ raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ end
+ return [ :notationdecl, *vals ]
+ when DOCTYPE_END
+ @document_status = :after_doctype
+ @source.match( DOCTYPE_END, true )
+ return [ :end_doctype ]
+ end
+ end
+ begin
+ if @source.buffer[0] == ?<
+ if @source.buffer[1] == ?/
+ @nsstack.shift
+ last_tag = @tags.pop
+ #md = @source.match_to_consume( '>', CLOSE_MATCH)
+ md = @source.match( CLOSE_MATCH, true )
+ raise REXML::ParseException.new( "Missing end tag for "+
+ "'#{last_tag}' (got \"#{md[1]}\")",
+ @source) unless last_tag == md[1]
+ return [ :end_element, last_tag ]
+ elsif @source.buffer[1] == ?!
+ md = @source.match(/\A(\s*[^>]*>)/um)
+ #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+ raise REXML::ParseException.new("Malformed node", @source) unless md
+ if md[0][2] == ?-
+ md = @source.match( COMMENT_PATTERN, true )
+
+ case md[1]
+ when /--/, /-\z/
+ raise REXML::ParseException.new("Malformed comment", @source)
+ end
+
+ return [ :comment, md[1] ] if md
+ else
+ md = @source.match( CDATA_PATTERN, true )
+ return [ :cdata, md[1] ] if md
+ end
+ raise REXML::ParseException.new( "Declarations can only occur "+
+ "in the doctype declaration.", @source)
+ elsif @source.buffer[1] == ??
+ md = @source.match( INSTRUCTION_PATTERN, true )
+ return [ :processing_instruction, md[1], md[2] ] if md
+ raise REXML::ParseException.new( "Bad instruction declaration",
+ @source)
+ else
+ # Get the next tag
+ md = @source.match(TAG_MATCH, true)
+ unless md
+ # Check for missing attribute quotes
+ raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source)
+ end
+ attributes = {}
+ prefixes = Set.new
+ prefixes << md[2] if md[2]
+ @nsstack.unshift(curr_ns=Set.new)
+ if md[4].size > 0
+ attrs = md[4].scan( ATTRIBUTE_PATTERN )
+ raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ attrs.each do |attr_name, prefix, local_part, quote, value|
+ if prefix == "xmlns"
+ if local_part == "xml"
+ if value != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif local_part == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self)
+ end
+ curr_ns << local_part
+ elsif prefix
+ prefixes << prefix unless prefix == "xml"
+ end
+
+ if attributes.has_key?(attr_name)
+ msg = "Duplicate attribute #{attr_name.inspect}"
+ raise REXML::ParseException.new(msg, @source, self)
+ end
+
+ attributes[attr_name] = value
+ end
+ end
+
+ # Verify that all of the prefixes have been defined
+ for prefix in prefixes
+ unless @nsstack.find{|k| k.member?(prefix)}
+ raise UndefinedNamespaceException.new(prefix,@source,self)
+ end
+ end
+
+ if md[6]
+ @closed = md[1]
+ @nsstack.shift
+ else
+ @tags.push( md[1] )
+ end
+ return [ :start_element, md[1], attributes ]
+ end
+ else
+ md = @source.match( TEXT_PATTERN, true )
+ if md[0].length == 0
+ @source.match( /(\s+)/, true )
+ end
+ #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
+ #return [ :text, "" ] if md[0].length == 0
+ # unnormalized = Text::unnormalize( md[1], self )
+ # return PullEvent.new( :text, md[1], unnormalized )
+ return [ :text, md[1] ]
+ end
+ rescue REXML::UndefinedNamespaceException
+ raise
+ rescue REXML::ParseException
+ raise
+ rescue Exception, NameError => error
+ raise REXML::ParseException.new( "Exception parsing",
+ @source, self, (error ? error : $!) )
+ end
+ return [ :dummy ]
+ end
+ private :pull_event
+
+ def entity( reference, entities )
+ value = nil
+ value = entities[ reference ] if entities
+ if not value
+ value = DEFAULT_ENTITIES[ reference ]
+ value = value[2] if value
+ end
+ unnormalize( value, entities ) if value
+ end
+
+ # Escapes all possible entities
+ def normalize( input, entities=nil, entity_filter=nil )
+ copy = input.clone
+ # Doing it like this rather than in a loop improves the speed
+ copy.gsub!( EREFERENCE, '&amp;' )
+ entities.each do |key, value|
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
+ entity_filter.include?(entity)
+ end if entities
+ copy.gsub!( EREFERENCE, '&amp;' )
+ DEFAULT_ENTITIES.each do |key, value|
+ copy.gsub!( value[3], value[1] )
+ end
+ copy
+ end
+
+ # Unescapes all possible entities
+ def unnormalize( string, entities=nil, filter=nil )
+ rv = string.clone
+ rv.gsub!( /\r\n?/, "\n" )
+ matches = rv.scan( REFERENCE_RE )
+ return rv if matches.size == 0
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
+ m=$1
+ m = "0#{m}" if m[0] == ?x
+ [Integer(m)].pack('U*')
+ }
+ matches.collect!{|x|x[0]}.compact!
+ if matches.size > 0
+ matches.each do |entity_reference|
+ unless filter and filter.include?(entity_reference)
+ entity_value = entity( entity_reference, entities )
+ if entity_value
+ re = /&#{entity_reference};/
+ rv.gsub!( re, entity_value )
+ else
+ er = DEFAULT_ENTITIES[entity_reference]
+ rv.gsub!( er[0], er[2] ) if er
+ end
+ end
+ end
+ rv.gsub!( /&amp;/, '&' )
+ end
+ rv
+ end
+
+ private
+ def need_source_encoding_update?(xml_declaration_encoding)
+ return false if xml_declaration_encoding.nil?
+ return false if /\AUTF-16\z/i =~ xml_declaration_encoding
+ true
+ end
+ end
+ end
+end
+
+=begin
+ case event[0]
+ when :start_element
+ when :text
+ when :end_element
+ when :processing_instruction
+ when :cdata
+ when :comment
+ when :xmldecl
+ when :start_doctype
+ when :end_doctype
+ when :externalentity
+ when :elementdecl
+ when :entity
+ when :attlistdecl
+ when :notationdecl
+ when :end_doctype
+ end
+=end
diff --git a/jni/ruby/lib/rexml/parsers/lightparser.rb b/jni/ruby/lib/rexml/parsers/lightparser.rb
new file mode 100644
index 0000000..8104168
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/lightparser.rb
@@ -0,0 +1,58 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+require 'rexml/light/node'
+
+module REXML
+ module Parsers
+ class LightParser
+ def initialize stream
+ @stream = stream
+ @parser = REXML::Parsers::BaseParser.new( stream )
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ def rewind
+ @stream.rewind
+ @parser.stream = @stream
+ end
+
+ def parse
+ root = context = [ :document ]
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ break
+ when :start_element, :start_doctype
+ new_node = event
+ context << new_node
+ new_node[1,0] = [context]
+ context = new_node
+ when :end_element, :end_doctype
+ context = context[1]
+ else
+ new_node = event
+ context << new_node
+ new_node[1,0] = [context]
+ end
+ end
+ root
+ end
+ end
+
+ # An element is an array. The array contains:
+ # 0 The parent element
+ # 1 The tag name
+ # 2 A hash of attributes
+ # 3..-1 The child elements
+ # An element is an array of size > 3
+ # Text is a String
+ # PIs are [ :processing_instruction, target, data ]
+ # Comments are [ :comment, data ]
+ # DocTypes are DocType structs
+ # The root is an array with XMLDecls, Text, DocType, Array, Text
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/pullparser.rb b/jni/ruby/lib/rexml/parsers/pullparser.rb
new file mode 100644
index 0000000..68a4ff7
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/pullparser.rb
@@ -0,0 +1,196 @@
+require 'forwardable'
+
+require 'rexml/parseexception'
+require 'rexml/parsers/baseparser'
+require 'rexml/xmltokens'
+
+module REXML
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class PullParser
+ include XMLTokens
+ extend Forwardable
+
+ def_delegators( :@parser, :has_next? )
+ def_delegators( :@parser, :entity )
+ def_delegators( :@parser, :empty? )
+ def_delegators( :@parser, :source )
+
+ def initialize stream
+ @entities = {}
+ @listeners = nil
+ @parser = BaseParser.new( stream )
+ @my_stack = []
+ end
+
+ def add_listener( listener )
+ @listeners = [] unless @listeners
+ @listeners << listener
+ end
+
+ def each
+ while has_next?
+ yield self.pull
+ end
+ end
+
+ def peek depth=0
+ if @my_stack.length <= depth
+ (depth - @my_stack.length + 1).times {
+ e = PullEvent.new(@parser.pull)
+ @my_stack.push(e)
+ }
+ end
+ @my_stack[depth]
+ end
+
+ def pull
+ return @my_stack.shift if @my_stack.length > 0
+
+ event = @parser.pull
+ case event[0]
+ when :entitydecl
+ @entities[ event[1] ] =
+ event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+ when :text
+ unnormalized = @parser.unnormalize( event[1], @entities )
+ event << unnormalized
+ end
+ PullEvent.new( event )
+ end
+
+ def unshift token
+ @my_stack.unshift token
+ end
+ end
+
+ # A parsing event. The contents of the event are accessed as an +Array?,
+ # and the type is given either by the ...? methods, or by accessing the
+ # +type+ accessor. The contents of this object vary from event to event,
+ # but are identical to the arguments passed to +StreamListener+s for each
+ # event.
+ class PullEvent
+ # The type of this event. Will be one of :tag_start, :tag_end, :text,
+ # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+ # :notationdecl, :entity, :cdata, :xmldecl, or :error.
+ def initialize(arg)
+ @contents = arg
+ end
+
+ def []( start, endd=nil)
+ if start.kind_of? Range
+ @contents.slice( start.begin+1 .. start.end )
+ elsif start.kind_of? Numeric
+ if endd.nil?
+ @contents.slice( start+1 )
+ else
+ @contents.slice( start+1, endd )
+ end
+ else
+ raise "Illegal argument #{start.inspect} (#{start.class})"
+ end
+ end
+
+ def event_type
+ @contents[0]
+ end
+
+ # Content: [ String tag_name, Hash attributes ]
+ def start_element?
+ @contents[0] == :start_element
+ end
+
+ # Content: [ String tag_name ]
+ def end_element?
+ @contents[0] == :end_element
+ end
+
+ # Content: [ String raw_text, String unnormalized_text ]
+ def text?
+ @contents[0] == :text
+ end
+
+ # Content: [ String text ]
+ def instruction?
+ @contents[0] == :processing_instruction
+ end
+
+ # Content: [ String text ]
+ def comment?
+ @contents[0] == :comment
+ end
+
+ # Content: [ String name, String pub_sys, String long_name, String uri ]
+ def doctype?
+ @contents[0] == :start_doctype
+ end
+
+ # Content: [ String text ]
+ def attlistdecl?
+ @contents[0] == :attlistdecl
+ end
+
+ # Content: [ String text ]
+ def elementdecl?
+ @contents[0] == :elementdecl
+ end
+
+ # Due to the wonders of DTDs, an entity declaration can be just about
+ # anything. There's no way to normalize it; you'll have to interpret the
+ # content yourself. However, the following is true:
+ #
+ # * If the entity declaration is an internal entity:
+ # [ String name, String value ]
+ # Content: [ String text ]
+ def entitydecl?
+ @contents[0] == :entitydecl
+ end
+
+ # Content: [ String text ]
+ def notationdecl?
+ @contents[0] == :notationdecl
+ end
+
+ # Content: [ String text ]
+ def entity?
+ @contents[0] == :entity
+ end
+
+ # Content: [ String text ]
+ def cdata?
+ @contents[0] == :cdata
+ end
+
+ # Content: [ String version, String encoding, String standalone ]
+ def xmldecl?
+ @contents[0] == :xmldecl
+ end
+
+ def error?
+ @contents[0] == :error
+ end
+
+ def inspect
+ @contents[0].to_s + ": " + @contents[1..-1].inspect
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/sax2parser.rb b/jni/ruby/lib/rexml/parsers/sax2parser.rb
new file mode 100644
index 0000000..a72c0a7
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/sax2parser.rb
@@ -0,0 +1,272 @@
+require 'rexml/parsers/baseparser'
+require 'rexml/parseexception'
+require 'rexml/namespace'
+require 'rexml/text'
+
+module REXML
+ module Parsers
+ # SAX2Parser
+ class SAX2Parser
+ def initialize source
+ @parser = BaseParser.new(source)
+ @listeners = []
+ @procs = []
+ @namespace_stack = []
+ @has_listeners = false
+ @tag_stack = []
+ @entities = {}
+ end
+
+ def source
+ @parser.source
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ # Listen arguments:
+ #
+ # Symbol, Array, Block
+ # Listen to Symbol events on Array elements
+ # Symbol, Block
+ # Listen to Symbol events
+ # Array, Listener
+ # Listen to all events on Array elements
+ # Array, Block
+ # Listen to :start_element events on Array elements
+ # Listener
+ # Listen to All events
+ #
+ # Symbol can be one of: :start_element, :end_element,
+ # :start_prefix_mapping, :end_prefix_mapping, :characters,
+ # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
+ # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+ #
+ # There is an additional symbol that can be listened for: :progress.
+ # This will be called for every event generated, passing in the current
+ # stream position.
+ #
+ # Array contains regular expressions or strings which will be matched
+ # against fully qualified element names.
+ #
+ # Listener must implement the methods in SAX2Listener
+ #
+ # Block will be passed the same arguments as a SAX2Listener method would
+ # be, where the method name is the same as the matched Symbol.
+ # See the SAX2Listener for more information.
+ def listen( *args, &blok )
+ if args[0].kind_of? Symbol
+ if args.size == 2
+ args[1].each { |match| @procs << [args[0], match, blok] }
+ else
+ add( [args[0], nil, blok] )
+ end
+ elsif args[0].kind_of? Array
+ if args.size == 2
+ args[0].each { |match| add( [nil, match, args[1]] ) }
+ else
+ args[0].each { |match| add( [ :start_element, match, blok ] ) }
+ end
+ else
+ add([nil, nil, args[0]])
+ end
+ end
+
+ def deafen( listener=nil, &blok )
+ if listener
+ @listeners.delete_if {|item| item[-1] == listener }
+ @has_listeners = false if @listeners.size == 0
+ else
+ @procs.delete_if {|item| item[-1] == blok }
+ end
+ end
+
+ def parse
+ @procs.each { |sym,match,block| block.call if sym == :start_document }
+ @listeners.each { |sym,match,block|
+ block.start_document if sym == :start_document or sym.nil?
+ }
+ context = []
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ handle( :end_document )
+ break
+ when :start_doctype
+ handle( :doctype, *event[1..-1])
+ when :end_doctype
+ context = context[1]
+ when :start_element
+ @tag_stack.push(event[1])
+ # find the observers for namespaces
+ procs = get_procs( :start_prefix_mapping, event[1] )
+ listeners = get_listeners( :start_prefix_mapping, event[1] )
+ if procs or listeners
+ # break out the namespace declarations
+ # The attributes live in event[2]
+ event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
+ nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
+ nsdecl.collect! { |n, value| [ n[6..-1], value ] }
+ @namespace_stack.push({})
+ nsdecl.each do |n,v|
+ @namespace_stack[-1][n] = v
+ # notify observers of namespaces
+ procs.each { |ob| ob.call( n, v ) } if procs
+ listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
+ end
+ end
+ event[1] =~ Namespace::NAMESPLIT
+ prefix = $1
+ local = $2
+ uri = get_namespace(prefix)
+ # find the observers for start_element
+ procs = get_procs( :start_element, event[1] )
+ listeners = get_listeners( :start_element, event[1] )
+ # notify observers
+ procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
+ listeners.each { |ob|
+ ob.start_element( uri, local, event[1], event[2] )
+ } if listeners
+ when :end_element
+ @tag_stack.pop
+ event[1] =~ Namespace::NAMESPLIT
+ prefix = $1
+ local = $2
+ uri = get_namespace(prefix)
+ # find the observers for start_element
+ procs = get_procs( :end_element, event[1] )
+ listeners = get_listeners( :end_element, event[1] )
+ # notify observers
+ procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
+ listeners.each { |ob|
+ ob.end_element( uri, local, event[1] )
+ } if listeners
+
+ namespace_mapping = @namespace_stack.pop
+ # find the observers for namespaces
+ procs = get_procs( :end_prefix_mapping, event[1] )
+ listeners = get_listeners( :end_prefix_mapping, event[1] )
+ if procs or listeners
+ namespace_mapping.each do |ns_prefix, ns_uri|
+ # notify observers of namespaces
+ procs.each { |ob| ob.call( ns_prefix ) } if procs
+ listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners
+ end
+ end
+ when :text
+ #normalized = @parser.normalize( event[1] )
+ #handle( :characters, normalized )
+ copy = event[1].clone
+
+ esub = proc { |match|
+ if @entities.has_key?($1)
+ @entities[$1].gsub(Text::REFERENCE, &esub)
+ else
+ match
+ end
+ }
+
+ copy.gsub!( Text::REFERENCE, &esub )
+ copy.gsub!( Text::NUMERICENTITY ) {|m|
+ m=$1
+ m = "0#{m}" if m[0] == ?x
+ [Integer(m)].pack('U*')
+ }
+ handle( :characters, copy )
+ when :entitydecl
+ handle_entitydecl( event )
+ when :processing_instruction, :comment, :attlistdecl,
+ :elementdecl, :cdata, :notationdecl, :xmldecl
+ handle( *event )
+ end
+ handle( :progress, @parser.position )
+ end
+ end
+
+ private
+ def handle( symbol, *arguments )
+ tag = @tag_stack[-1]
+ procs = get_procs( symbol, tag )
+ listeners = get_listeners( symbol, tag )
+ # notify observers
+ procs.each { |ob| ob.call( *arguments ) } if procs
+ listeners.each { |l|
+ l.send( symbol.to_s, *arguments )
+ } if listeners
+ end
+
+ def handle_entitydecl( event )
+ @entities[ event[1] ] = event[2] if event.size == 3
+ parameter_reference_p = false
+ case event[2]
+ when "SYSTEM"
+ if event.size == 5
+ if event.last == "%"
+ parameter_reference_p = true
+ else
+ event[4, 0] = "NDATA"
+ end
+ end
+ when "PUBLIC"
+ if event.size == 6
+ if event.last == "%"
+ parameter_reference_p = true
+ else
+ event[5, 0] = "NDATA"
+ end
+ end
+ else
+ parameter_reference_p = (event.size == 4)
+ end
+ event[1, 0] = event.pop if parameter_reference_p
+ handle( event[0], event[1..-1] )
+ end
+
+ # The following methods are duplicates, but it is faster than using
+ # a helper
+ def get_procs( symbol, name )
+ return nil if @procs.size == 0
+ @procs.find_all do |sym, match, block|
+ (
+ (sym.nil? or symbol == sym) and
+ ((name.nil? and match.nil?) or match.nil? or (
+ (name == match) or
+ (match.kind_of? Regexp and name =~ match)
+ )
+ )
+ )
+ end.collect{|x| x[-1]}
+ end
+ def get_listeners( symbol, name )
+ return nil if @listeners.size == 0
+ @listeners.find_all do |sym, match, block|
+ (
+ (sym.nil? or symbol == sym) and
+ ((name.nil? and match.nil?) or match.nil? or (
+ (name == match) or
+ (match.kind_of? Regexp and name =~ match)
+ )
+ )
+ )
+ end.collect{|x| x[-1]}
+ end
+
+ def add( pair )
+ if pair[-1].respond_to? :call
+ @procs << pair unless @procs.include? pair
+ else
+ @listeners << pair unless @listeners.include? pair
+ @has_listeners = true
+ end
+ end
+
+ def get_namespace( prefix )
+ uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
+ (@namespace_stack.find { |ns| not ns[nil].nil? })
+ uris[-1][prefix] unless uris.nil? or 0 == uris.size
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/streamparser.rb b/jni/ruby/lib/rexml/parsers/streamparser.rb
new file mode 100644
index 0000000..9ea65ed
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/streamparser.rb
@@ -0,0 +1,52 @@
+require "rexml/parsers/baseparser"
+
+module REXML
+ module Parsers
+ class StreamParser
+ def initialize source, listener
+ @listener = listener
+ @parser = BaseParser.new( source )
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ def parse
+ # entity string
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ return
+ when :start_element
+ attrs = event[2].each do |n, v|
+ event[2][n] = @parser.unnormalize( v )
+ end
+ @listener.tag_start( event[1], attrs )
+ when :end_element
+ @listener.tag_end( event[1] )
+ when :text
+ normalized = @parser.unnormalize( event[1] )
+ @listener.text( normalized )
+ when :processing_instruction
+ @listener.instruction( *event[1,2] )
+ when :start_doctype
+ @listener.doctype( *event[1..-1] )
+ when :end_doctype
+ # FIXME: remove this condition for milestone:3.2
+ @listener.doctype_end if @listener.respond_to? :doctype_end
+ when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
+ @listener.send( event[0].to_s, *event[1..-1] )
+ when :entitydecl, :notationdecl
+ @listener.send( event[0].to_s, event[1..-1] )
+ when :externalentity
+ entity_reference = event[1]
+ content = entity_reference.gsub(/\A%|;\z/, "")
+ @listener.entity(content)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/treeparser.rb b/jni/ruby/lib/rexml/parsers/treeparser.rb
new file mode 100644
index 0000000..68edb77
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/treeparser.rb
@@ -0,0 +1,100 @@
+require 'rexml/validation/validationexception'
+require 'rexml/undefinednamespaceexception'
+
+module REXML
+ module Parsers
+ class TreeParser
+ def initialize( source, build_context = Document.new )
+ @build_context = build_context
+ @parser = Parsers::BaseParser.new( source )
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ def parse
+ tag_stack = []
+ in_doctype = false
+ entities = nil
+ begin
+ while true
+ event = @parser.pull
+ #STDERR.puts "TREEPARSER GOT #{event.inspect}"
+ case event[0]
+ when :end_document
+ unless tag_stack.empty?
+ raise ParseException.new("No close tag for #{@build_context.xpath}",
+ @parser.source, @parser)
+ end
+ return
+ when :start_element
+ tag_stack.push(event[1])
+ el = @build_context = @build_context.add_element( event[1] )
+ event[2].each do |key, value|
+ el.attributes[key]=Attribute.new(key,value,self)
+ end
+ when :end_element
+ tag_stack.pop
+ @build_context = @build_context.parent
+ when :text
+ if not in_doctype
+ if @build_context[-1].instance_of? Text
+ @build_context[-1] << event[1]
+ else
+ @build_context.add(
+ Text.new(event[1], @build_context.whitespace, nil, true)
+ ) unless (
+ @build_context.ignore_whitespace_nodes and
+ event[1].strip.size==0
+ )
+ end
+ end
+ when :comment
+ c = Comment.new( event[1] )
+ @build_context.add( c )
+ when :cdata
+ c = CData.new( event[1] )
+ @build_context.add( c )
+ when :processing_instruction
+ @build_context.add( Instruction.new( event[1], event[2] ) )
+ when :end_doctype
+ in_doctype = false
+ entities.each { |k,v| entities[k] = @build_context.entities[k].value }
+ @build_context = @build_context.parent
+ when :start_doctype
+ doctype = DocType.new( event[1..-1], @build_context )
+ @build_context = doctype
+ entities = {}
+ in_doctype = true
+ when :attlistdecl
+ n = AttlistDecl.new( event[1..-1] )
+ @build_context.add( n )
+ when :externalentity
+ n = ExternalEntity.new( event[1] )
+ @build_context.add( n )
+ when :elementdecl
+ n = ElementDecl.new( event[1] )
+ @build_context.add(n)
+ when :entitydecl
+ entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+ @build_context.add(Entity.new(event))
+ when :notationdecl
+ n = NotationDecl.new( *event[1..-1] )
+ @build_context.add( n )
+ when :xmldecl
+ x = XMLDecl.new( event[1], event[2], event[3] )
+ @build_context.add( x )
+ end
+ end
+ rescue REXML::Validation::ValidationException
+ raise
+ rescue REXML::ParseException
+ raise
+ rescue
+ raise ParseException.new( $!.message, @parser.source, @parser, $! )
+ end
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/ultralightparser.rb b/jni/ruby/lib/rexml/parsers/ultralightparser.rb
new file mode 100644
index 0000000..4e2d7a8
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/ultralightparser.rb
@@ -0,0 +1,56 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+
+module REXML
+ module Parsers
+ class UltraLightParser
+ def initialize stream
+ @stream = stream
+ @parser = REXML::Parsers::BaseParser.new( stream )
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ def rewind
+ @stream.rewind
+ @parser.stream = @stream
+ end
+
+ def parse
+ root = context = []
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ break
+ when :end_doctype
+ context = context[1]
+ when :start_element, :start_doctype
+ context << event
+ event[1,0] = [context]
+ context = event
+ when :end_element
+ context = context[1]
+ else
+ context << event
+ end
+ end
+ root
+ end
+ end
+
+ # An element is an array. The array contains:
+ # 0 The parent element
+ # 1 The tag name
+ # 2 A hash of attributes
+ # 3..-1 The child elements
+ # An element is an array of size > 3
+ # Text is a String
+ # PIs are [ :processing_instruction, target, data ]
+ # Comments are [ :comment, data ]
+ # DocTypes are DocType structs
+ # The root is an array with XMLDecls, Text, DocType, Array, Text
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/xpathparser.rb b/jni/ruby/lib/rexml/parsers/xpathparser.rb
new file mode 100644
index 0000000..57767fb
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/xpathparser.rb
@@ -0,0 +1,656 @@
+require 'rexml/namespace'
+require 'rexml/xmltokens'
+
+module REXML
+ module Parsers
+ # You don't want to use this class. Really. Use XPath, which is a wrapper
+ # for this class. Believe me. You don't want to poke around in here.
+ # There is strange, dark magic at work in this code. Beware. Go back! Go
+ # back while you still can!
+ class XPathParser
+ include XMLTokens
+ LITERAL = /^'([^']*)'|^"([^"]*)"/u
+
+ def namespaces=( namespaces )
+ Functions::namespace_context = namespaces
+ @namespaces = namespaces
+ end
+
+ def parse path
+ path = path.dup
+ path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
+ path.gsub!( /\s+([\]\)])/, '\1')
+ parsed = []
+ OrExpr(path, parsed)
+ parsed
+ end
+
+ def predicate path
+ parsed = []
+ Predicate( "[#{path}]", parsed )
+ parsed
+ end
+
+ def abbreviate( path )
+ path = path.kind_of?(String) ? parse( path ) : path
+ string = ""
+ document = false
+ while path.size > 0
+ op = path.shift
+ case op
+ when :node
+ when :attribute
+ string << "/" if string.size > 0
+ string << "@"
+ when :child
+ string << "/" if string.size > 0
+ when :descendant_or_self
+ string << "/"
+ when :self
+ string << "."
+ when :parent
+ string << ".."
+ when :any
+ string << "*"
+ when :text
+ string << "text()"
+ when :following, :following_sibling,
+ :ancestor, :ancestor_or_self, :descendant,
+ :namespace, :preceding, :preceding_sibling
+ string << "/" unless string.size == 0
+ string << op.to_s.tr("_", "-")
+ string << "::"
+ when :qname
+ prefix = path.shift
+ name = path.shift
+ string << prefix+":" if prefix.size > 0
+ string << name
+ when :predicate
+ string << '['
+ string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
+ string << ']'
+ when :document
+ document = true
+ when :function
+ string << path.shift
+ string << "( "
+ string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
+ string << " )"
+ when :literal
+ string << %Q{ "#{path.shift}" }
+ else
+ string << "/" unless string.size == 0
+ string << "UNKNOWN("
+ string << op.inspect
+ string << ")"
+ end
+ end
+ string = "/"+string if document
+ return string
+ end
+
+ def expand( path )
+ path = path.kind_of?(String) ? parse( path ) : path
+ string = ""
+ document = false
+ while path.size > 0
+ op = path.shift
+ case op
+ when :node
+ string << "node()"
+ when :attribute, :child, :following, :following_sibling,
+ :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
+ :namespace, :preceding, :preceding_sibling, :self, :parent
+ string << "/" unless string.size == 0
+ string << op.to_s.tr("_", "-")
+ string << "::"
+ when :any
+ string << "*"
+ when :qname
+ prefix = path.shift
+ name = path.shift
+ string << prefix+":" if prefix.size > 0
+ string << name
+ when :predicate
+ string << '['
+ string << predicate_to_string( path.shift ) { |x| expand(x) }
+ string << ']'
+ when :document
+ document = true
+ else
+ string << "/" unless string.size == 0
+ string << "UNKNOWN("
+ string << op.inspect
+ string << ")"
+ end
+ end
+ string = "/"+string if document
+ return string
+ end
+
+ def predicate_to_string( path, &block )
+ string = ""
+ case path[0]
+ when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
+ op = path.shift
+ case op
+ when :eq
+ op = "="
+ when :lt
+ op = "<"
+ when :gt
+ op = ">"
+ when :lteq
+ op = "<="
+ when :gteq
+ op = ">="
+ when :neq
+ op = "!="
+ when :union
+ op = "|"
+ end
+ left = predicate_to_string( path.shift, &block )
+ right = predicate_to_string( path.shift, &block )
+ string << " "
+ string << left
+ string << " "
+ string << op.to_s
+ string << " "
+ string << right
+ string << " "
+ when :function
+ path.shift
+ name = path.shift
+ string << name
+ string << "( "
+ string << predicate_to_string( path.shift, &block )
+ string << " )"
+ when :literal
+ path.shift
+ string << " "
+ string << path.shift.inspect
+ string << " "
+ else
+ string << " "
+ string << yield( path )
+ string << " "
+ end
+ return string.squeeze(" ")
+ end
+
+ private
+ #LocationPath
+ # | RelativeLocationPath
+ # | '/' RelativeLocationPath?
+ # | '//' RelativeLocationPath
+ def LocationPath path, parsed
+ path = path.strip
+ if path[0] == ?/
+ parsed << :document
+ if path[1] == ?/
+ parsed << :descendant_or_self
+ parsed << :node
+ path = path[2..-1]
+ else
+ path = path[1..-1]
+ end
+ end
+ return RelativeLocationPath( path, parsed ) if path.size > 0
+ end
+
+ #RelativeLocationPath
+ # | Step
+ # | (AXIS_NAME '::' | '@' | '') AxisSpecifier
+ # NodeTest
+ # Predicate
+ # | '.' | '..' AbbreviatedStep
+ # | RelativeLocationPath '/' Step
+ # | RelativeLocationPath '//' Step
+ AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
+ def RelativeLocationPath path, parsed
+ while path.size > 0
+ # (axis or @ or <child::>) nodetest predicate >
+ # OR > / Step
+ # (. or ..) >
+ if path[0] == ?.
+ if path[1] == ?.
+ parsed << :parent
+ parsed << :node
+ path = path[2..-1]
+ else
+ parsed << :self
+ parsed << :node
+ path = path[1..-1]
+ end
+ else
+ if path[0] == ?@
+ parsed << :attribute
+ path = path[1..-1]
+ # Goto Nodetest
+ elsif path =~ AXIS
+ parsed << $1.tr('-','_').intern
+ path = $'
+ # Goto Nodetest
+ else
+ parsed << :child
+ end
+
+ n = []
+ path = NodeTest( path, n)
+
+ if path[0] == ?[
+ path = Predicate( path, n )
+ end
+
+ parsed.concat(n)
+ end
+
+ if path.size > 0
+ if path[0] == ?/
+ if path[1] == ?/
+ parsed << :descendant_or_self
+ parsed << :node
+ path = path[2..-1]
+ else
+ path = path[1..-1]
+ end
+ else
+ return path
+ end
+ end
+ end
+ return path
+ end
+
+ # Returns a 1-1 map of the nodeset
+ # The contents of the resulting array are either:
+ # true/false, if a positive match
+ # String, if a name match
+ #NodeTest
+ # | ('*' | NCNAME ':' '*' | QNAME) NameTest
+ # | NODE_TYPE '(' ')' NodeType
+ # | PI '(' LITERAL ')' PI
+ # | '[' expr ']' Predicate
+ NCNAMETEST= /^(#{NCNAME_STR}):\*/u
+ QNAME = Namespace::NAMESPLIT
+ NODE_TYPE = /^(comment|text|node)\(\s*\)/m
+ PI = /^processing-instruction\(/
+ def NodeTest path, parsed
+ case path
+ when /^\*/
+ path = $'
+ parsed << :any
+ when NODE_TYPE
+ type = $1
+ path = $'
+ parsed << type.tr('-', '_').intern
+ when PI
+ path = $'
+ literal = nil
+ if path !~ /^\s*\)/
+ path =~ LITERAL
+ literal = $1
+ path = $'
+ raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
+ path = path[1..-1]
+ end
+ parsed << :processing_instruction
+ parsed << (literal || '')
+ when NCNAMETEST
+ prefix = $1
+ path = $'
+ parsed << :namespace
+ parsed << prefix
+ when QNAME
+ prefix = $1
+ name = $2
+ path = $'
+ prefix = "" unless prefix
+ parsed << :qname
+ parsed << prefix
+ parsed << name
+ end
+ return path
+ end
+
+ # Filters the supplied nodeset on the predicate(s)
+ def Predicate path, parsed
+ return nil unless path[0] == ?[
+ predicates = []
+ while path[0] == ?[
+ path, expr = get_group(path)
+ predicates << expr[1..-2] if expr
+ end
+ predicates.each{ |pred|
+ preds = []
+ parsed << :predicate
+ parsed << preds
+ OrExpr(pred, preds)
+ }
+ path
+ end
+
+ # The following return arrays of true/false, a 1-1 mapping of the
+ # supplied nodeset, except for axe(), which returns a filtered
+ # nodeset
+
+ #| OrExpr S 'or' S AndExpr
+ #| AndExpr
+ def OrExpr path, parsed
+ n = []
+ rest = AndExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*( or )/
+ n = [ :or, n, [] ]
+ rest = AndExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| AndExpr S 'and' S EqualityExpr
+ #| EqualityExpr
+ def AndExpr path, parsed
+ n = []
+ rest = EqualityExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*( and )/
+ n = [ :and, n, [] ]
+ rest = EqualityExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| EqualityExpr ('=' | '!=') RelationalExpr
+ #| RelationalExpr
+ def EqualityExpr path, parsed
+ n = []
+ rest = RelationalExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*(!?=)\s*/
+ if $1[0] == ?!
+ n = [ :neq, n, [] ]
+ else
+ n = [ :eq, n, [] ]
+ end
+ rest = RelationalExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
+ #| AdditiveExpr
+ def RelationalExpr path, parsed
+ n = []
+ rest = AdditiveExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*([<>]=?)\s*/
+ if $1[0] == ?<
+ sym = "lt"
+ else
+ sym = "gt"
+ end
+ sym << "eq" if $1[-1] == ?=
+ n = [ sym.intern, n, [] ]
+ rest = AdditiveExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
+ #| MultiplicativeExpr
+ def AdditiveExpr path, parsed
+ n = []
+ rest = MultiplicativeExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*(\+| -)\s*/
+ if $1[0] == ?+
+ n = [ :plus, n, [] ]
+ else
+ n = [ :minus, n, [] ]
+ end
+ rest = MultiplicativeExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
+ #| UnaryExpr
+ def MultiplicativeExpr path, parsed
+ n = []
+ rest = UnaryExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*(\*| div | mod )\s*/
+ if $1[0] == ?*
+ n = [ :mult, n, [] ]
+ elsif $1.include?( "div" )
+ n = [ :div, n, [] ]
+ else
+ n = [ :mod, n, [] ]
+ end
+ rest = UnaryExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| '-' UnaryExpr
+ #| UnionExpr
+ def UnaryExpr path, parsed
+ path =~ /^(\-*)/
+ path = $'
+ if $1 and (($1.size % 2) != 0)
+ mult = -1
+ else
+ mult = 1
+ end
+ parsed << :neg if mult < 0
+
+ n = []
+ path = UnionExpr( path, n )
+ parsed.concat( n )
+ path
+ end
+
+ #| UnionExpr '|' PathExpr
+ #| PathExpr
+ def UnionExpr path, parsed
+ n = []
+ rest = PathExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*(\|)\s*/
+ n = [ :union, n, [] ]
+ rest = PathExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace( n )
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| LocationPath
+ #| FilterExpr ('/' | '//') RelativeLocationPath
+ def PathExpr path, parsed
+ path =~ /^\s*/
+ path = $'
+ n = []
+ rest = FilterExpr( path, n )
+ if rest != path
+ if rest and rest[0] == ?/
+ return RelativeLocationPath(rest, n)
+ end
+ end
+ rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
+ parsed.concat(n)
+ return rest
+ end
+
+ #| FilterExpr Predicate
+ #| PrimaryExpr
+ def FilterExpr path, parsed
+ n = []
+ path = PrimaryExpr( path, n )
+ path = Predicate(path, n) if path and path[0] == ?[
+ parsed.concat(n)
+ path
+ end
+
+ #| VARIABLE_REFERENCE
+ #| '(' expr ')'
+ #| LITERAL
+ #| NUMBER
+ #| FunctionCall
+ VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
+ NUMBER = /^(\d*\.?\d+)/
+ NT = /^comment|text|processing-instruction|node$/
+ def PrimaryExpr path, parsed
+ case path
+ when VARIABLE_REFERENCE
+ varname = $1
+ path = $'
+ parsed << :variable
+ parsed << varname
+ #arry << @variables[ varname ]
+ when /^(\w[-\w]*)(?:\()/
+ fname = $1
+ tmp = $'
+ return path if fname =~ NT
+ path = tmp
+ parsed << :function
+ parsed << fname
+ path = FunctionCall(path, parsed)
+ when NUMBER
+ varname = $1.nil? ? $2 : $1
+ path = $'
+ parsed << :literal
+ parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
+ when LITERAL
+ varname = $1.nil? ? $2 : $1
+ path = $'
+ parsed << :literal
+ parsed << varname
+ when /^\(/ #/
+ path, contents = get_group(path)
+ contents = contents[1..-2]
+ n = []
+ OrExpr( contents, n )
+ parsed.concat(n)
+ end
+ path
+ end
+
+ #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
+ def FunctionCall rest, parsed
+ path, arguments = parse_args(rest)
+ argset = []
+ for argument in arguments
+ args = []
+ OrExpr( argument, args )
+ argset << args
+ end
+ parsed << argset
+ path
+ end
+
+ # get_group( '[foo]bar' ) -> ['bar', '[foo]']
+ def get_group string
+ ind = 0
+ depth = 0
+ st = string[0,1]
+ en = (st == "(" ? ")" : "]")
+ begin
+ case string[ind,1]
+ when st
+ depth += 1
+ when en
+ depth -= 1
+ end
+ ind += 1
+ end while depth > 0 and ind < string.length
+ return nil unless depth==0
+ [string[ind..-1], string[0..ind-1]]
+ end
+
+ def parse_args( string )
+ arguments = []
+ ind = 0
+ inquot = false
+ inapos = false
+ depth = 1
+ begin
+ case string[ind]
+ when ?"
+ inquot = !inquot unless inapos
+ when ?'
+ inapos = !inapos unless inquot
+ else
+ unless inquot or inapos
+ case string[ind]
+ when ?(
+ depth += 1
+ if depth == 1
+ string = string[1..-1]
+ ind -= 1
+ end
+ when ?)
+ depth -= 1
+ if depth == 0
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ end
+ when ?,
+ if depth == 1
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ ind = -1
+ end
+ end
+ end
+ end
+ ind += 1
+ end while depth > 0 and ind < string.length
+ return nil unless depth==0
+ [string,arguments]
+ end
+ end
+ end
+end