summaryrefslogtreecommitdiff
path: root/jni/ruby/lib/rexml/xpath_parser.rb
diff options
context:
space:
mode:
authorJari Vetoniemi <jari.vetoniemi@indooratlas.com>2020-03-16 18:49:26 +0900
committerJari Vetoniemi <jari.vetoniemi@indooratlas.com>2020-03-30 00:39:06 +0900
commitfcbf63e62c627deae76c1b8cb8c0876c536ed811 (patch)
tree64cb17de3f41a2b6fef2368028fbd00349946994 /jni/ruby/lib/rexml/xpath_parser.rb
Fresh start
Diffstat (limited to 'jni/ruby/lib/rexml/xpath_parser.rb')
-rw-r--r--jni/ruby/lib/rexml/xpath_parser.rb703
1 files changed, 703 insertions, 0 deletions
diff --git a/jni/ruby/lib/rexml/xpath_parser.rb b/jni/ruby/lib/rexml/xpath_parser.rb
new file mode 100644
index 0000000..ef49a32
--- /dev/null
+++ b/jni/ruby/lib/rexml/xpath_parser.rb
@@ -0,0 +1,703 @@
+require 'rexml/namespace'
+require 'rexml/xmltokens'
+require 'rexml/attribute'
+require 'rexml/syncenumerator'
+require 'rexml/parsers/xpathparser'
+
+class Object
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone
+ clone
+ end
+end
+class Symbol
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+end
+class Fixnum
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+end
+class Float
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+end
+class Array
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object+ types
+ def dclone
+ klone = self.clone
+ klone.clear
+ self.each{|v| klone << v.dclone}
+ klone
+ end
+end
+
+module REXML
+ # You don't want to use this class. Really. Use XPath, which is a wrapper
+ # for this class. Believe me. You don't want to poke around in here.
+ # There is strange, dark magic at work in this code. Beware. Go back! Go
+ # back while you still can!
+ class XPathParser
+ include XMLTokens
+ LITERAL = /^'([^']*)'|^"([^"]*)"/u
+
+ def initialize( )
+ @parser = REXML::Parsers::XPathParser.new
+ @namespaces = nil
+ @variables = {}
+ end
+
+ def namespaces=( namespaces={} )
+ Functions::namespace_context = namespaces
+ @namespaces = namespaces
+ end
+
+ def variables=( vars={} )
+ Functions::variables = vars
+ @variables = vars
+ end
+
+ def parse path, nodeset
+ path_stack = @parser.parse( path )
+ match( path_stack, nodeset )
+ end
+
+ def get_first path, nodeset
+ path_stack = @parser.parse( path )
+ first( path_stack, nodeset )
+ end
+
+ def predicate path, nodeset
+ path_stack = @parser.parse( path )
+ expr( path_stack, nodeset )
+ end
+
+ def []=( variable_name, value )
+ @variables[ variable_name ] = value
+ end
+
+
+ # Performs a depth-first (document order) XPath search, and returns the
+ # first match. This is the fastest, lightest way to return a single result.
+ #
+ # FIXME: This method is incomplete!
+ def first( path_stack, node )
+ return nil if path.size == 0
+
+ case path[0]
+ when :document
+ # do nothing
+ return first( path[1..-1], node )
+ when :child
+ for c in node.children
+ r = first( path[1..-1], c )
+ return r if r
+ end
+ when :qname
+ name = path[2]
+ if node.name == name
+ return node if path.size == 3
+ return first( path[3..-1], node )
+ else
+ return nil
+ end
+ when :descendant_or_self
+ r = first( path[1..-1], node )
+ return r if r
+ for c in node.children
+ r = first( path, c )
+ return r if r
+ end
+ when :node
+ return first( path[1..-1], node )
+ when :any
+ return first( path[1..-1], node )
+ end
+ return nil
+ end
+
+
+ def match( path_stack, nodeset )
+ r = expr( path_stack, nodeset )
+ r
+ end
+
+ private
+
+
+ # Returns a String namespace for a node, given a prefix
+ # The rules are:
+ #
+ # 1. Use the supplied namespace mapping first.
+ # 2. If no mapping was supplied, use the context node to look up the namespace
+ def get_namespace( node, prefix )
+ if @namespaces
+ return @namespaces[prefix] || ''
+ else
+ return node.namespace( prefix ) if node.node_type == :element
+ return ''
+ end
+ end
+
+
+ # Expr takes a stack of path elements and a set of nodes (either a Parent
+ # or an Array and returns an Array of matching nodes
+ ALL = [ :attribute, :element, :text, :processing_instruction, :comment ]
+ ELEMENTS = [ :element ]
+ def expr( path_stack, nodeset, context=nil )
+ node_types = ELEMENTS
+ return nodeset if path_stack.length == 0 || nodeset.length == 0
+ while path_stack.length > 0
+ if nodeset.length == 0
+ path_stack.clear
+ return []
+ end
+ case (op = path_stack.shift)
+ when :document
+ nodeset = [ nodeset[0].root_node ]
+
+ when :qname
+ prefix = path_stack.shift
+ name = path_stack.shift
+ nodeset.delete_if do |node|
+ # FIXME: This DOUBLES the time XPath searches take
+ ns = get_namespace( node, prefix )
+ if node.node_type == :element
+ if node.name == name
+ end
+ end
+ !(node.node_type == :element and
+ node.name == name and
+ node.namespace == ns )
+ end
+ node_types = ELEMENTS
+
+ when :any
+ nodeset.delete_if { |node| !node_types.include?(node.node_type) }
+
+ when :self
+ # This space left intentionally blank
+
+ when :processing_instruction
+ target = path_stack.shift
+ nodeset.delete_if do |node|
+ (node.node_type != :processing_instruction) or
+ ( target!='' and ( node.target != target ) )
+ end
+
+ when :text
+ nodeset.delete_if { |node| node.node_type != :text }
+
+ when :comment
+ nodeset.delete_if { |node| node.node_type != :comment }
+
+ when :node
+ # This space left intentionally blank
+ node_types = ALL
+
+ when :child
+ new_nodeset = []
+ nt = nil
+ nodeset.each do |node|
+ nt = node.node_type
+ new_nodeset += node.children if nt == :element or nt == :document
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :literal
+ return path_stack.shift
+
+ when :attribute
+ new_nodeset = []
+ case path_stack.shift
+ when :qname
+ prefix = path_stack.shift
+ name = path_stack.shift
+ for element in nodeset
+ if element.node_type == :element
+ attrib = element.attribute( name, get_namespace(element, prefix) )
+ new_nodeset << attrib if attrib
+ end
+ end
+ when :any
+ for element in nodeset
+ if element.node_type == :element
+ new_nodeset += element.attributes.to_a
+ end
+ end
+ end
+ nodeset = new_nodeset
+
+ when :parent
+ nodeset = nodeset.collect{|n| n.parent}.compact
+ #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact)
+ node_types = ELEMENTS
+
+ when :ancestor
+ new_nodeset = []
+ nodeset.each do |node|
+ while node.parent
+ node = node.parent
+ new_nodeset << node unless new_nodeset.include? node
+ end
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :ancestor_or_self
+ new_nodeset = []
+ nodeset.each do |node|
+ if node.node_type == :element
+ new_nodeset << node
+ while ( node.parent )
+ node = node.parent
+ new_nodeset << node unless new_nodeset.include? node
+ end
+ end
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :predicate
+ new_nodeset = []
+ subcontext = { :size => nodeset.size }
+ pred = path_stack.shift
+ nodeset.each_with_index { |node, index|
+ subcontext[ :node ] = node
+ subcontext[ :index ] = index+1
+ pc = pred.dclone
+ result = expr( pc, [node], subcontext )
+ result = result[0] if result.kind_of? Array and result.length == 1
+ if result.kind_of? Numeric
+ new_nodeset << node if result == (index+1)
+ elsif result.instance_of? Array
+ if result.size > 0 and result.inject(false) {|k,s| s or k}
+ new_nodeset << node if result.size > 0
+ end
+ else
+ new_nodeset << node if result
+ end
+ }
+ nodeset = new_nodeset
+=begin
+ predicate = path_stack.shift
+ ns = nodeset.clone
+ result = expr( predicate, ns )
+ if result.kind_of? Array
+ nodeset = result.zip(ns).collect{|m,n| n if m}.compact
+ else
+ nodeset = result ? nodeset : []
+ end
+=end
+
+ when :descendant_or_self
+ rv = descendant_or_self( path_stack, nodeset )
+ path_stack.clear
+ nodeset = rv
+ node_types = ELEMENTS
+
+ when :descendant
+ results = []
+ nt = nil
+ nodeset.each do |node|
+ nt = node.node_type
+ results += expr( path_stack.dclone.unshift( :descendant_or_self ),
+ node.children ) if nt == :element or nt == :document
+ end
+ nodeset = results
+ node_types = ELEMENTS
+
+ when :following_sibling
+ results = []
+ nodeset.each do |node|
+ next if node.parent.nil?
+ all_siblings = node.parent.children
+ current_index = all_siblings.index( node )
+ following_siblings = all_siblings[ current_index+1 .. -1 ]
+ results += expr( path_stack.dclone, following_siblings )
+ end
+ nodeset = results
+
+ when :preceding_sibling
+ results = []
+ nodeset.each do |node|
+ next if node.parent.nil?
+ all_siblings = node.parent.children
+ current_index = all_siblings.index( node )
+ preceding_siblings = all_siblings[ 0, current_index ].reverse
+ results += preceding_siblings
+ end
+ nodeset = results
+ node_types = ELEMENTS
+
+ when :preceding
+ new_nodeset = []
+ nodeset.each do |node|
+ new_nodeset += preceding( node )
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :following
+ new_nodeset = []
+ nodeset.each do |node|
+ new_nodeset += following( node )
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :namespace
+ new_nodeset = []
+ prefix = path_stack.shift
+ nodeset.each do |node|
+ if (node.node_type == :element or node.node_type == :attribute)
+ if @namespaces
+ namespaces = @namespaces
+ elsif (node.node_type == :element)
+ namespaces = node.namespaces
+ else
+ namespaces = node.element.namesapces
+ end
+ if (node.namespace == namespaces[prefix])
+ new_nodeset << node
+ end
+ end
+ end
+ nodeset = new_nodeset
+
+ when :variable
+ var_name = path_stack.shift
+ return @variables[ var_name ]
+
+ # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
+ # TODO: Special case for :or and :and -- not evaluate the right
+ # operand if the left alone determines result (i.e. is true for
+ # :or and false for :and).
+ when :eq, :neq, :lt, :lteq, :gt, :gteq, :or
+ left = expr( path_stack.shift, nodeset.dup, context )
+ right = expr( path_stack.shift, nodeset.dup, context )
+ res = equality_relational_compare( left, op, right )
+ return res
+
+ when :and
+ left = expr( path_stack.shift, nodeset.dup, context )
+ return [] unless left
+ if left.respond_to?(:inject) and !left.inject(false) {|a,b| a | b}
+ return []
+ end
+ right = expr( path_stack.shift, nodeset.dup, context )
+ res = equality_relational_compare( left, op, right )
+ return res
+
+ when :div
+ left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
+ return (left / right)
+
+ when :mod
+ left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ return (left % right)
+
+ when :mult
+ left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ return (left * right)
+
+ when :plus
+ left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ return (left + right)
+
+ when :minus
+ left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ return (left - right)
+
+ when :union
+ left = expr( path_stack.shift, nodeset, context )
+ right = expr( path_stack.shift, nodeset, context )
+ return (left | right)
+
+ when :neg
+ res = expr( path_stack, nodeset, context )
+ return -(res.to_f)
+
+ when :not
+ when :function
+ func_name = path_stack.shift.tr('-','_')
+ arguments = path_stack.shift
+ subcontext = context ? nil : { :size => nodeset.size }
+
+ res = []
+ cont = context
+ nodeset.each_with_index { |n, i|
+ if subcontext
+ subcontext[:node] = n
+ subcontext[:index] = i
+ cont = subcontext
+ end
+ arg_clone = arguments.dclone
+ args = arg_clone.collect { |arg|
+ expr( arg, [n], cont )
+ }
+ Functions.context = cont
+ res << Functions.send( func_name, *args )
+ }
+ return res
+
+ end
+ end # while
+ return nodeset
+ end
+
+
+ ##########################################################
+ # FIXME
+ # The next two methods are BAD MOJO!
+ # This is my achilles heel. If anybody thinks of a better
+ # way of doing this, be my guest. This really sucks, but
+ # it is a wonder it works at all.
+ # ########################################################
+
+ def descendant_or_self( path_stack, nodeset )
+ rs = []
+ d_o_s( path_stack, nodeset, rs )
+ document_order(rs.flatten.compact)
+ #rs.flatten.compact
+ end
+
+ def d_o_s( p, ns, r )
+ nt = nil
+ ns.each_index do |i|
+ n = ns[i]
+ x = expr( p.dclone, [ n ] )
+ nt = n.node_type
+ d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
+ r.concat(x) if x.size > 0
+ end
+ end
+
+
+ # Reorders an array of nodes so that they are in document order
+ # It tries to do this efficiently.
+ #
+ # FIXME: I need to get rid of this, but the issue is that most of the XPath
+ # interpreter functions as a filter, which means that we lose context going
+ # in and out of function calls. If I knew what the index of the nodes was,
+ # I wouldn't have to do this. Maybe add a document IDX for each node?
+ # Problems with mutable documents. Or, rewrite everything.
+ def document_order( array_of_nodes )
+ new_arry = []
+ array_of_nodes.each { |node|
+ node_idx = []
+ np = node.node_type == :attribute ? node.element : node
+ while np.parent and np.parent.node_type == :element
+ node_idx << np.parent.index( np )
+ np = np.parent
+ end
+ new_arry << [ node_idx.reverse, node ]
+ }
+ new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
+ end
+
+
+ def recurse( nodeset, &block )
+ for node in nodeset
+ yield node
+ recurse( node, &block ) if node.node_type == :element
+ end
+ end
+
+
+
+ # Builds a nodeset of all of the preceding nodes of the supplied node,
+ # in reverse document order
+ # preceding:: includes every element in the document that precedes this node,
+ # except for ancestors
+ def preceding( node )
+ ancestors = []
+ p = node.parent
+ while p
+ ancestors << p
+ p = p.parent
+ end
+
+ acc = []
+ p = preceding_node_of( node )
+ while p
+ if ancestors.include? p
+ ancestors.delete(p)
+ else
+ acc << p
+ end
+ p = preceding_node_of( p )
+ end
+ acc
+ end
+
+ def preceding_node_of( node )
+ psn = node.previous_sibling_node
+ if psn.nil?
+ if node.parent.nil? or node.parent.class == Document
+ return nil
+ end
+ return node.parent
+ #psn = preceding_node_of( node.parent )
+ end
+ while psn and psn.kind_of? Element and psn.children.size > 0
+ psn = psn.children[-1]
+ end
+ psn
+ end
+
+ def following( node )
+ acc = []
+ p = next_sibling_node( node )
+ while p
+ acc << p
+ p = following_node_of( p )
+ end
+ acc
+ end
+
+ def following_node_of( node )
+ if node.kind_of? Element and node.children.size > 0
+ return node.children[0]
+ end
+ return next_sibling_node(node)
+ end
+
+ def next_sibling_node(node)
+ psn = node.next_sibling_node
+ while psn.nil?
+ if node.parent.nil? or node.parent.class == Document
+ return nil
+ end
+ node = node.parent
+ psn = node.next_sibling_node
+ end
+ return psn
+ end
+
+ def norm b
+ case b
+ when true, false
+ return b
+ when 'true', 'false'
+ return Functions::boolean( b )
+ when /^\d+(\.\d+)?$/
+ return Functions::number( b )
+ else
+ return Functions::string( b )
+ end
+ end
+
+ def equality_relational_compare( set1, op, set2 )
+ if set1.kind_of? Array and set2.kind_of? Array
+ if set1.size == 1 and set2.size == 1
+ set1 = set1[0]
+ set2 = set2[0]
+ elsif set1.size == 0 or set2.size == 0
+ nd = set1.size==0 ? set2 : set1
+ rv = nd.collect { |il| compare( il, op, nil ) }
+ return rv
+ else
+ res = []
+ SyncEnumerator.new( set1, set2 ).each { |i1, i2|
+ i1 = norm( i1 )
+ i2 = norm( i2 )
+ res << compare( i1, op, i2 )
+ }
+ return res
+ end
+ end
+ # If one is nodeset and other is number, compare number to each item
+ # in nodeset s.t. number op number(string(item))
+ # If one is nodeset and other is string, compare string to each item
+ # in nodeset s.t. string op string(item)
+ # If one is nodeset and other is boolean, compare boolean to each item
+ # in nodeset s.t. boolean op boolean(item)
+ if set1.kind_of? Array or set2.kind_of? Array
+ if set1.kind_of? Array
+ a = set1
+ b = set2
+ else
+ a = set2
+ b = set1
+ end
+
+ case b
+ when true, false
+ return a.collect {|v| compare( Functions::boolean(v), op, b ) }
+ when Numeric
+ return a.collect {|v| compare( Functions::number(v), op, b )}
+ when /^\d+(\.\d+)?$/
+ b = Functions::number( b )
+ return a.collect {|v| compare( Functions::number(v), op, b )}
+ else
+ b = Functions::string( b )
+ return a.collect { |v| compare( Functions::string(v), op, b ) }
+ end
+ else
+ # If neither is nodeset,
+ # If op is = or !=
+ # If either boolean, convert to boolean
+ # If either number, convert to number
+ # Else, convert to string
+ # Else
+ # Convert both to numbers and compare
+ s1 = set1.to_s
+ s2 = set2.to_s
+ if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
+ set1 = Functions::boolean( set1 )
+ set2 = Functions::boolean( set2 )
+ else
+ if op == :eq or op == :neq
+ if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
+ set1 = Functions::number( s1 )
+ set2 = Functions::number( s2 )
+ else
+ set1 = Functions::string( set1 )
+ set2 = Functions::string( set2 )
+ end
+ else
+ set1 = Functions::number( set1 )
+ set2 = Functions::number( set2 )
+ end
+ end
+ return compare( set1, op, set2 )
+ end
+ return false
+ end
+
+ def compare a, op, b
+ case op
+ when :eq
+ a == b
+ when :neq
+ a != b
+ when :lt
+ a < b
+ when :lteq
+ a <= b
+ when :gt
+ a > b
+ when :gteq
+ a >= b
+ when :and
+ a and b
+ when :or
+ a or b
+ else
+ false
+ end
+ end
+ end
+end