summaryrefslogtreecommitdiff
path: root/jni/ruby/lib/rexml
diff options
context:
space:
mode:
authorJari Vetoniemi <jari.vetoniemi@indooratlas.com>2020-03-16 18:49:26 +0900
committerJari Vetoniemi <jari.vetoniemi@indooratlas.com>2020-03-30 00:39:06 +0900
commitfcbf63e62c627deae76c1b8cb8c0876c536ed811 (patch)
tree64cb17de3f41a2b6fef2368028fbd00349946994 /jni/ruby/lib/rexml
Fresh start
Diffstat (limited to 'jni/ruby/lib/rexml')
-rw-r--r--jni/ruby/lib/rexml/attlistdecl.rb62
-rw-r--r--jni/ruby/lib/rexml/attribute.rb191
-rw-r--r--jni/ruby/lib/rexml/cdata.rb67
-rw-r--r--jni/ruby/lib/rexml/child.rb96
-rw-r--r--jni/ruby/lib/rexml/comment.rb79
-rw-r--r--jni/ruby/lib/rexml/doctype.rb269
-rw-r--r--jni/ruby/lib/rexml/document.rb290
-rw-r--r--jni/ruby/lib/rexml/dtd/attlistdecl.rb10
-rw-r--r--jni/ruby/lib/rexml/dtd/dtd.rb46
-rw-r--r--jni/ruby/lib/rexml/dtd/elementdecl.rb17
-rw-r--r--jni/ruby/lib/rexml/dtd/entitydecl.rb56
-rw-r--r--jni/ruby/lib/rexml/dtd/notationdecl.rb39
-rw-r--r--jni/ruby/lib/rexml/element.rb1240
-rw-r--r--jni/ruby/lib/rexml/encoding.rb50
-rw-r--r--jni/ruby/lib/rexml/entity.rb173
-rw-r--r--jni/ruby/lib/rexml/formatters/default.rb111
-rw-r--r--jni/ruby/lib/rexml/formatters/pretty.rb141
-rw-r--r--jni/ruby/lib/rexml/formatters/transitive.rb57
-rw-r--r--jni/ruby/lib/rexml/functions.rb394
-rw-r--r--jni/ruby/lib/rexml/instruction.rb70
-rw-r--r--jni/ruby/lib/rexml/light/node.rb195
-rw-r--r--jni/ruby/lib/rexml/namespace.rb47
-rw-r--r--jni/ruby/lib/rexml/node.rb75
-rw-r--r--jni/ruby/lib/rexml/output.rb29
-rw-r--r--jni/ruby/lib/rexml/parent.rb165
-rw-r--r--jni/ruby/lib/rexml/parseexception.rb51
-rw-r--r--jni/ruby/lib/rexml/parsers/baseparser.rb532
-rw-r--r--jni/ruby/lib/rexml/parsers/lightparser.rb58
-rw-r--r--jni/ruby/lib/rexml/parsers/pullparser.rb196
-rw-r--r--jni/ruby/lib/rexml/parsers/sax2parser.rb272
-rw-r--r--jni/ruby/lib/rexml/parsers/streamparser.rb52
-rw-r--r--jni/ruby/lib/rexml/parsers/treeparser.rb100
-rw-r--r--jni/ruby/lib/rexml/parsers/ultralightparser.rb56
-rw-r--r--jni/ruby/lib/rexml/parsers/xpathparser.rb656
-rw-r--r--jni/ruby/lib/rexml/quickpath.rb265
-rw-r--r--jni/ruby/lib/rexml/rexml.rb31
-rw-r--r--jni/ruby/lib/rexml/sax2listener.rb97
-rw-r--r--jni/ruby/lib/rexml/security.rb27
-rw-r--r--jni/ruby/lib/rexml/source.rb296
-rw-r--r--jni/ruby/lib/rexml/streamlistener.rb92
-rw-r--r--jni/ruby/lib/rexml/syncenumerator.rb32
-rw-r--r--jni/ruby/lib/rexml/text.rb425
-rw-r--r--jni/ruby/lib/rexml/undefinednamespaceexception.rb8
-rw-r--r--jni/ruby/lib/rexml/validation/relaxng.rb538
-rw-r--r--jni/ruby/lib/rexml/validation/validation.rb143
-rw-r--r--jni/ruby/lib/rexml/validation/validationexception.rb9
-rw-r--r--jni/ruby/lib/rexml/xmldecl.rb115
-rw-r--r--jni/ruby/lib/rexml/xmltokens.rb84
-rw-r--r--jni/ruby/lib/rexml/xpath.rb80
-rw-r--r--jni/ruby/lib/rexml/xpath_parser.rb703
50 files changed, 8887 insertions, 0 deletions
diff --git a/jni/ruby/lib/rexml/attlistdecl.rb b/jni/ruby/lib/rexml/attlistdecl.rb
new file mode 100644
index 0000000..ec4e6c3
--- /dev/null
+++ b/jni/ruby/lib/rexml/attlistdecl.rb
@@ -0,0 +1,62 @@
+#vim:ts=2 sw=2 noexpandtab:
+require 'rexml/child'
+require 'rexml/source'
+
+module REXML
+ # This class needs:
+ # * Documentation
+ # * Work! Not all types of attlists are intelligently parsed, so we just
+ # spew back out what we get in. This works, but it would be better if
+ # we formatted the output ourselves.
+ #
+ # AttlistDecls provide *just* enough support to allow namespace
+ # declarations. If you need some sort of generalized support, or have an
+ # interesting idea about how to map the hideous, terrible design of DTD
+ # AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
+ # for anything to make DTDs more palateable.
+ class AttlistDecl < Child
+ include Enumerable
+
+ # What is this? Got me.
+ attr_reader :element_name
+
+ # Create an AttlistDecl, pulling the information from a Source. Notice
+ # that this isn't very convenient; to create an AttlistDecl, you basically
+ # have to format it yourself, and then have the initializer parse it.
+ # Sorry, but for the forseeable future, DTD support in REXML is pretty
+ # weak on convenience. Have I mentioned how much I hate DTDs?
+ def initialize(source)
+ super()
+ if (source.kind_of? Array)
+ @element_name, @pairs, @contents = *source
+ end
+ end
+
+ # Access the attlist attribute/value pairs.
+ # value = attlist_decl[ attribute_name ]
+ def [](key)
+ @pairs[key]
+ end
+
+ # Whether an attlist declaration includes the given attribute definition
+ # if attlist_decl.include? "xmlns:foobar"
+ def include?(key)
+ @pairs.keys.include? key
+ end
+
+ # Iterate over the key/value pairs:
+ # attlist_decl.each { |attribute_name, attribute_value| ... }
+ def each(&block)
+ @pairs.each(&block)
+ end
+
+ # Write out exactly what we got in.
+ def write out, indent=-1
+ out << @contents
+ end
+
+ def node_type
+ :attlistdecl
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/attribute.rb b/jni/ruby/lib/rexml/attribute.rb
new file mode 100644
index 0000000..ef9e544
--- /dev/null
+++ b/jni/ruby/lib/rexml/attribute.rb
@@ -0,0 +1,191 @@
+require "rexml/namespace"
+require 'rexml/text'
+
+module REXML
+ # Defines an Element Attribute; IE, a attribute=value pair, as in:
+ # <element attribute="value"/>. Attributes can be in their own
+ # namespaces. General users of REXML will not interact with the
+ # Attribute class much.
+ class Attribute
+ include Node
+ include Namespace
+
+ # The element to which this attribute belongs
+ attr_reader :element
+ # The normalized value of this attribute. That is, the attribute with
+ # entities intact.
+ attr_writer :normalized
+ PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+
+ NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
+
+ # Constructor.
+ # FIXME: The parser doesn't catch illegal characters in attributes
+ #
+ # first::
+ # Either: an Attribute, which this new attribute will become a
+ # clone of; or a String, which is the name of this attribute
+ # second::
+ # If +first+ is an Attribute, then this may be an Element, or nil.
+ # If nil, then the Element parent of this attribute is the parent
+ # of the +first+ Attribute. If the first argument is a String,
+ # then this must also be a String, and is the content of the attribute.
+ # If this is the content, it must be fully normalized (contain no
+ # illegal characters).
+ # parent::
+ # Ignored unless +first+ is a String; otherwise, may be the Element
+ # parent of this attribute, or nil.
+ #
+ #
+ # Attribute.new( attribute_to_clone )
+ # Attribute.new( attribute_to_clone, parent_element )
+ # Attribute.new( "attr", "attr_value" )
+ # Attribute.new( "attr", "attr_value", parent_element )
+ def initialize( first, second=nil, parent=nil )
+ @normalized = @unnormalized = @element = nil
+ if first.kind_of? Attribute
+ self.name = first.expanded_name
+ @unnormalized = first.value
+ if second.kind_of? Element
+ @element = second
+ else
+ @element = first.element
+ end
+ elsif first.kind_of? String
+ @element = parent
+ self.name = first
+ @normalized = second.to_s
+ else
+ raise "illegal argument #{first.class.name} to Attribute constructor"
+ end
+ end
+
+ # Returns the namespace of the attribute.
+ #
+ # e = Element.new( "elns:myelement" )
+ # e.add_attribute( "nsa:a", "aval" )
+ # e.add_attribute( "b", "bval" )
+ # e.attributes.get_attribute( "a" ).prefix # -> "nsa"
+ # e.attributes.get_attribute( "b" ).prefix # -> "elns"
+ # a = Attribute.new( "x", "y" )
+ # a.prefix # -> ""
+ def prefix
+ pf = super
+ if pf == ""
+ pf = @element.prefix if @element
+ end
+ pf
+ end
+
+ # Returns the namespace URL, if defined, or nil otherwise
+ #
+ # e = Element.new("el")
+ # e.add_namespace("ns", "http://url")
+ # e.add_attribute("ns:a", "b")
+ # e.add_attribute("nsx:a", "c")
+ # e.attribute("ns:a").namespace # => "http://url"
+ # e.attribute("nsx:a").namespace # => nil
+ def namespace arg=nil
+ arg = prefix if arg.nil?
+ @element.namespace arg
+ end
+
+ # Returns true if other is an Attribute and has the same name and value,
+ # false otherwise.
+ def ==( other )
+ other.kind_of?(Attribute) and other.name==name and other.value==value
+ end
+
+ # Creates (and returns) a hash from both the name and value
+ def hash
+ name.hash + value.hash
+ end
+
+ # Returns this attribute out as XML source, expanding the name
+ #
+ # a = Attribute.new( "x", "y" )
+ # a.to_string # -> "x='y'"
+ # b = Attribute.new( "ns:x", "y" )
+ # b.to_string # -> "ns:x='y'"
+ def to_string
+ if @element and @element.context and @element.context[:attribute_quote] == :quote
+ %Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
+ else
+ "#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
+ end
+ end
+
+ def doctype
+ if @element
+ doc = @element.document
+ doc.doctype if doc
+ end
+ end
+
+ # Returns the attribute value, with entities replaced
+ def to_s
+ return @normalized if @normalized
+
+ @normalized = Text::normalize( @unnormalized, doctype )
+ @unnormalized = nil
+ @normalized
+ end
+
+ # Returns the UNNORMALIZED value of this attribute. That is, entities
+ # have been expanded to their values
+ def value
+ return @unnormalized if @unnormalized
+ @unnormalized = Text::unnormalize( @normalized, doctype )
+ @normalized = nil
+ @unnormalized
+ end
+
+ # Returns a copy of this attribute
+ def clone
+ Attribute.new self
+ end
+
+ # Sets the element of which this object is an attribute. Normally, this
+ # is not directly called.
+ #
+ # Returns this attribute
+ def element=( element )
+ @element = element
+
+ if @normalized
+ Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype )
+ end
+
+ self
+ end
+
+ # Removes this Attribute from the tree, and returns true if successful
+ #
+ # This method is usually not called directly.
+ def remove
+ @element.attributes.delete self.name unless @element.nil?
+ end
+
+ # Writes this attribute (EG, puts 'key="value"' to the output)
+ def write( output, indent=-1 )
+ output << to_string
+ end
+
+ def node_type
+ :attribute
+ end
+
+ def inspect
+ rv = ""
+ write( rv )
+ rv
+ end
+
+ def xpath
+ path = @element.xpath
+ path += "/@#{self.expanded_name}"
+ return path
+ end
+ end
+end
+#vim:ts=2 sw=2 noexpandtab:
diff --git a/jni/ruby/lib/rexml/cdata.rb b/jni/ruby/lib/rexml/cdata.rb
new file mode 100644
index 0000000..73358ed
--- /dev/null
+++ b/jni/ruby/lib/rexml/cdata.rb
@@ -0,0 +1,67 @@
+require "rexml/text"
+
+module REXML
+ class CData < Text
+ START = '<![CDATA['
+ STOP = ']]>'
+ ILLEGAL = /(\]\]>)/
+
+ # Constructor. CData is data between <![CDATA[ ... ]]>
+ #
+ # _Examples_
+ # CData.new( source )
+ # CData.new( "Here is some CDATA" )
+ # CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
+ def initialize( first, whitespace=true, parent=nil )
+ super( first, whitespace, parent, false, true, ILLEGAL )
+ end
+
+ # Make a copy of this object
+ #
+ # _Examples_
+ # c = CData.new( "Some text" )
+ # d = c.clone
+ # d.to_s # -> "Some text"
+ def clone
+ CData.new self
+ end
+
+ # Returns the content of this CData object
+ #
+ # _Examples_
+ # c = CData.new( "Some text" )
+ # c.to_s # -> "Some text"
+ def to_s
+ @string
+ end
+
+ def value
+ @string
+ end
+
+ # == DEPRECATED
+ # See the rexml/formatters package
+ #
+ # Generates XML output of this object
+ #
+ # output::
+ # Where to write the string. Defaults to $stdout
+ # indent::
+ # The amount to indent this node by
+ # transitive::
+ # Ignored
+ # ie_hack::
+ # Ignored
+ #
+ # _Examples_
+ # c = CData.new( " Some text " )
+ # c.write( $stdout ) #-> <![CDATA[ Some text ]]>
+ def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+ Kernel.warn( "#{self.class.name}.write is deprecated" )
+ indent( output, indent )
+ output << START
+ output << @string
+ output << STOP
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/child.rb b/jni/ruby/lib/rexml/child.rb
new file mode 100644
index 0000000..bf97d5f
--- /dev/null
+++ b/jni/ruby/lib/rexml/child.rb
@@ -0,0 +1,96 @@
+require "rexml/node"
+
+module REXML
+ ##
+ # A Child object is something contained by a parent, and this class
+ # contains methods to support that. Most user code will not use this
+ # class directly.
+ class Child
+ include Node
+ attr_reader :parent # The Parent of this object
+
+ # Constructor. Any inheritors of this class should call super to make
+ # sure this method is called.
+ # parent::
+ # if supplied, the parent of this child will be set to the
+ # supplied value, and self will be added to the parent
+ def initialize( parent = nil )
+ @parent = nil
+ # Declare @parent, but don't define it. The next line sets the
+ # parent.
+ parent.add( self ) if parent
+ end
+
+ # Replaces this object with another object. Basically, calls
+ # Parent.replace_child
+ #
+ # Returns:: self
+ def replace_with( child )
+ @parent.replace_child( self, child )
+ self
+ end
+
+ # Removes this child from the parent.
+ #
+ # Returns:: self
+ def remove
+ unless @parent.nil?
+ @parent.delete self
+ end
+ self
+ end
+
+ # Sets the parent of this child to the supplied argument.
+ #
+ # other::
+ # Must be a Parent object. If this object is the same object as the
+ # existing parent of this child, no action is taken. Otherwise, this
+ # child is removed from the current parent (if one exists), and is added
+ # to the new parent.
+ # Returns:: The parent added
+ def parent=( other )
+ return @parent if @parent == other
+ @parent.delete self if defined? @parent and @parent
+ @parent = other
+ end
+
+ alias :next_sibling :next_sibling_node
+ alias :previous_sibling :previous_sibling_node
+
+ # Sets the next sibling of this child. This can be used to insert a child
+ # after some other child.
+ # a = Element.new("a")
+ # b = a.add_element("b")
+ # c = Element.new("c")
+ # b.next_sibling = c
+ # # => <a><b/><c/></a>
+ def next_sibling=( other )
+ parent.insert_after self, other
+ end
+
+ # Sets the previous sibling of this child. This can be used to insert a
+ # child before some other child.
+ # a = Element.new("a")
+ # b = a.add_element("b")
+ # c = Element.new("c")
+ # b.previous_sibling = c
+ # # => <a><b/><c/></a>
+ def previous_sibling=(other)
+ parent.insert_before self, other
+ end
+
+ # Returns:: the document this child belongs to, or nil if this child
+ # belongs to no document
+ def document
+ return parent.document unless parent.nil?
+ nil
+ end
+
+ # This doesn't yet handle encodings
+ def bytes
+ document.encoding
+
+ to_s
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/comment.rb b/jni/ruby/lib/rexml/comment.rb
new file mode 100644
index 0000000..000b03d
--- /dev/null
+++ b/jni/ruby/lib/rexml/comment.rb
@@ -0,0 +1,79 @@
+require "rexml/child"
+
+module REXML
+ ##
+ # Represents an XML comment; that is, text between \<!-- ... -->
+ class Comment < Child
+ include Comparable
+ START = "<!--"
+ STOP = "-->"
+
+ # The content text
+
+ attr_accessor :string
+
+ ##
+ # Constructor. The first argument can be one of three types:
+ # @param first If String, the contents of this comment are set to the
+ # argument. If Comment, the argument is duplicated. If
+ # Source, the argument is scanned for a comment.
+ # @param second If the first argument is a Source, this argument
+ # should be nil, not supplied, or a Parent to be set as the parent
+ # of this object
+ def initialize( first, second = nil )
+ super(second)
+ if first.kind_of? String
+ @string = first
+ elsif first.kind_of? Comment
+ @string = first.string
+ end
+ end
+
+ def clone
+ Comment.new self
+ end
+
+ # == DEPRECATED
+ # See REXML::Formatters
+ #
+ # output::
+ # Where to write the string
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount.
+ # transitive::
+ # Ignored by this class. The contents of comments are never modified.
+ # ie_hack::
+ # Needed for conformity to the child API, but not used by this class.
+ def write( output, indent=-1, transitive=false, ie_hack=false )
+ Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
+ indent( output, indent )
+ output << START
+ output << @string
+ output << STOP
+ end
+
+ alias :to_s :string
+
+ ##
+ # Compares this Comment to another; the contents of the comment are used
+ # in the comparison.
+ def <=>(other)
+ other.to_s <=> @string
+ end
+
+ ##
+ # Compares this Comment to another; the contents of the comment are used
+ # in the comparison.
+ def ==( other )
+ other.kind_of? Comment and
+ (other <=> self) == 0
+ end
+
+ def node_type
+ :comment
+ end
+ end
+end
+#vim:ts=2 sw=2 noexpandtab:
diff --git a/jni/ruby/lib/rexml/doctype.rb b/jni/ruby/lib/rexml/doctype.rb
new file mode 100644
index 0000000..0b3c533
--- /dev/null
+++ b/jni/ruby/lib/rexml/doctype.rb
@@ -0,0 +1,269 @@
+require "rexml/parent"
+require "rexml/parseexception"
+require "rexml/namespace"
+require 'rexml/entity'
+require 'rexml/attlistdecl'
+require 'rexml/xmltokens'
+
+module REXML
+ # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
+ # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
+ # being used to declare entities used in the document.
+ class DocType < Parent
+ include XMLTokens
+ START = "<!DOCTYPE"
+ STOP = ">"
+ SYSTEM = "SYSTEM"
+ PUBLIC = "PUBLIC"
+ DEFAULT_ENTITIES = {
+ 'gt'=>EntityConst::GT,
+ 'lt'=>EntityConst::LT,
+ 'quot'=>EntityConst::QUOT,
+ "apos"=>EntityConst::APOS
+ }
+
+ # name is the name of the doctype
+ # external_id is the referenced DTD, if given
+ attr_reader :name, :external_id, :entities, :namespaces
+
+ # Constructor
+ #
+ # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
+ # # <!DOCTYPE foo '-//I/Hate/External/IDs'>
+ # dt = DocType.new( doctype_to_clone )
+ # # Incomplete. Shallow clone of doctype
+ #
+ # +Note+ that the constructor:
+ #
+ # Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
+ #
+ # is _deprecated_. Do not use it. It will probably disappear.
+ def initialize( first, parent=nil )
+ @entities = DEFAULT_ENTITIES
+ @long_name = @uri = nil
+ if first.kind_of? String
+ super()
+ @name = first
+ @external_id = parent
+ elsif first.kind_of? DocType
+ super( parent )
+ @name = first.name
+ @external_id = first.external_id
+ elsif first.kind_of? Array
+ super( parent )
+ @name = first[0]
+ @external_id = first[1]
+ @long_name = first[2]
+ @uri = first[3]
+ elsif first.kind_of? Source
+ super( parent )
+ parser = Parsers::BaseParser.new( first )
+ event = parser.pull
+ if event[0] == :start_doctype
+ @name, @external_id, @long_name, @uri, = event[1..-1]
+ end
+ else
+ super()
+ end
+ end
+
+ def node_type
+ :doctype
+ end
+
+ def attributes_of element
+ rv = []
+ each do |child|
+ child.each do |key,val|
+ rv << Attribute.new(key,val)
+ end if child.kind_of? AttlistDecl and child.element_name == element
+ end
+ rv
+ end
+
+ def attribute_of element, attribute
+ att_decl = find do |child|
+ child.kind_of? AttlistDecl and
+ child.element_name == element and
+ child.include? attribute
+ end
+ return nil unless att_decl
+ att_decl[attribute]
+ end
+
+ def clone
+ DocType.new self
+ end
+
+ # output::
+ # Where to write the string
+ # indent::
+ # An integer. If -1, no indentation will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount.
+ # transitive::
+ # Ignored
+ # ie_hack::
+ # Ignored
+ def write( output, indent=0, transitive=false, ie_hack=false )
+ f = REXML::Formatters::Default.new
+ indent( output, indent )
+ output << START
+ output << ' '
+ output << @name
+ output << " #@external_id" if @external_id
+ output << " #{@long_name.inspect}" if @long_name
+ output << " #{@uri.inspect}" if @uri
+ unless @children.empty?
+ output << ' ['
+ @children.each { |child|
+ output << "\n"
+ f.write( child, output )
+ }
+ output << "\n]"
+ end
+ output << STOP
+ end
+
+ def context
+ @parent.context
+ end
+
+ def entity( name )
+ @entities[name].unnormalized if @entities[name]
+ end
+
+ def add child
+ super(child)
+ @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
+ @entities[ child.name ] = child if child.kind_of? Entity
+ end
+
+ # This method retrieves the public identifier identifying the document's
+ # DTD.
+ #
+ # Method contributed by Henrik Martensson
+ def public
+ case @external_id
+ when "SYSTEM"
+ nil
+ when "PUBLIC"
+ strip_quotes(@long_name)
+ end
+ end
+
+ # This method retrieves the system identifier identifying the document's DTD
+ #
+ # Method contributed by Henrik Martensson
+ def system
+ case @external_id
+ when "SYSTEM"
+ strip_quotes(@long_name)
+ when "PUBLIC"
+ @uri.kind_of?(String) ? strip_quotes(@uri) : nil
+ end
+ end
+
+ # This method returns a list of notations that have been declared in the
+ # _internal_ DTD subset. Notations in the external DTD subset are not
+ # listed.
+ #
+ # Method contributed by Henrik Martensson
+ def notations
+ children().select {|node| node.kind_of?(REXML::NotationDecl)}
+ end
+
+ # Retrieves a named notation. Only notations declared in the internal
+ # DTD subset can be retrieved.
+ #
+ # Method contributed by Henrik Martensson
+ def notation(name)
+ notations.find { |notation_decl|
+ notation_decl.name == name
+ }
+ end
+
+ private
+
+ # Method contributed by Henrik Martensson
+ def strip_quotes(quoted_string)
+ quoted_string =~ /^[\'\"].*[\'\"]$/ ?
+ quoted_string[1, quoted_string.length-2] :
+ quoted_string
+ end
+ end
+
+ # We don't really handle any of these since we're not a validating
+ # parser, so we can be pretty dumb about them. All we need to be able
+ # to do is spew them back out on a write()
+
+ # This is an abstract class. You never use this directly; it serves as a
+ # parent class for the specific declarations.
+ class Declaration < Child
+ def initialize src
+ super()
+ @string = src
+ end
+
+ def to_s
+ @string+'>'
+ end
+
+ # == DEPRECATED
+ # See REXML::Formatters
+ #
+ def write( output, indent )
+ output << to_s
+ end
+ end
+
+ public
+ class ElementDecl < Declaration
+ def initialize( src )
+ super
+ end
+ end
+
+ class ExternalEntity < Child
+ def initialize( src )
+ super()
+ @entity = src
+ end
+ def to_s
+ @entity
+ end
+ def write( output, indent )
+ output << @entity
+ end
+ end
+
+ class NotationDecl < Child
+ attr_accessor :public, :system
+ def initialize name, middle, pub, sys
+ super(nil)
+ @name = name
+ @middle = middle
+ @public = pub
+ @system = sys
+ end
+
+ def to_s
+ notation = "<!NOTATION #{@name} #{@middle}"
+ notation << " #{@public.inspect}" if @public
+ notation << " #{@system.inspect}" if @system
+ notation << ">"
+ notation
+ end
+
+ def write( output, indent=-1 )
+ output << to_s
+ end
+
+ # This method retrieves the name of the notation.
+ #
+ # Method contributed by Henrik Martensson
+ def name
+ @name
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/document.rb b/jni/ruby/lib/rexml/document.rb
new file mode 100644
index 0000000..d7d24f4
--- /dev/null
+++ b/jni/ruby/lib/rexml/document.rb
@@ -0,0 +1,290 @@
+require "rexml/security"
+require "rexml/element"
+require "rexml/xmldecl"
+require "rexml/source"
+require "rexml/comment"
+require "rexml/doctype"
+require "rexml/instruction"
+require "rexml/rexml"
+require "rexml/parseexception"
+require "rexml/output"
+require "rexml/parsers/baseparser"
+require "rexml/parsers/streamparser"
+require "rexml/parsers/treeparser"
+
+module REXML
+ # Represents a full XML document, including PIs, a doctype, etc. A
+ # Document has a single child that can be accessed by root().
+ # Note that if you want to have an XML declaration written for a document
+ # you create, you must add one; REXML documents do not write a default
+ # declaration for you. See |DECLARATION| and |write|.
+ class Document < Element
+ # A convenient default XML declaration. If you want an XML declaration,
+ # the easiest way to add one is mydoc << Document::DECLARATION
+ # +DEPRECATED+
+ # Use: mydoc << XMLDecl.default
+ DECLARATION = XMLDecl.default
+
+ # Constructor
+ # @param source if supplied, must be a Document, String, or IO.
+ # Documents have their context and Element attributes cloned.
+ # Strings are expected to be valid XML documents. IOs are expected
+ # to be sources of valid XML documents.
+ # @param context if supplied, contains the context of the document;
+ # this should be a Hash.
+ def initialize( source = nil, context = {} )
+ @entity_expansion_count = 0
+ super()
+ @context = context
+ return if source.nil?
+ if source.kind_of? Document
+ @context = source.context
+ super source
+ else
+ build( source )
+ end
+ end
+
+ def node_type
+ :document
+ end
+
+ # Should be obvious
+ def clone
+ Document.new self
+ end
+
+ # According to the XML spec, a root node has no expanded name
+ def expanded_name
+ ''
+ #d = doc_type
+ #d ? d.name : "UNDEFINED"
+ end
+
+ alias :name :expanded_name
+
+ # We override this, because XMLDecls and DocTypes must go at the start
+ # of the document
+ def add( child )
+ if child.kind_of? XMLDecl
+ if @children[0].kind_of? XMLDecl
+ @children[0] = child
+ else
+ @children.unshift child
+ end
+ child.parent = self
+ elsif child.kind_of? DocType
+ # Find first Element or DocType node and insert the decl right
+ # before it. If there is no such node, just insert the child at the
+ # end. If there is a child and it is an DocType, then replace it.
+ insert_before_index = @children.find_index { |x|
+ x.kind_of?(Element) || x.kind_of?(DocType)
+ }
+ if insert_before_index # Not null = not end of list
+ if @children[ insert_before_index ].kind_of? DocType
+ @children[ insert_before_index ] = child
+ else
+ @children[ insert_before_index-1, 0 ] = child
+ end
+ else # Insert at end of list
+ @children << child
+ end
+ child.parent = self
+ else
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+ end
+ alias :<< :add
+
+ def add_element(arg=nil, arg2=nil)
+ rv = super
+ raise "attempted adding second root element to document" if @elements.size > 1
+ rv
+ end
+
+ # @return the root Element of the document, or nil if this document
+ # has no children.
+ def root
+ elements[1]
+ #self
+ #@children.find { |item| item.kind_of? Element }
+ end
+
+ # @return the DocType child of the document, if one exists,
+ # and nil otherwise.
+ def doctype
+ @children.find { |item| item.kind_of? DocType }
+ end
+
+ # @return the XMLDecl of this document; if no XMLDecl has been
+ # set, the default declaration is returned.
+ def xml_decl
+ rv = @children[0]
+ return rv if rv.kind_of? XMLDecl
+ @children.unshift(XMLDecl.default)[0]
+ end
+
+ # @return the XMLDecl version of this document as a String.
+ # If no XMLDecl has been set, returns the default version.
+ def version
+ xml_decl().version
+ end
+
+ # @return the XMLDecl encoding of this document as an
+ # Encoding object.
+ # If no XMLDecl has been set, returns the default encoding.
+ def encoding
+ xml_decl().encoding
+ end
+
+ # @return the XMLDecl standalone value of this document as a String.
+ # If no XMLDecl has been set, returns the default setting.
+ def stand_alone?
+ xml_decl().stand_alone?
+ end
+
+ # :call-seq:
+ # doc.write(output=$stdout, indent=-1, transtive=false, ie_hack=false, encoding=nil)
+ # doc.write(options={:output => $stdout, :indent => -1, :transtive => false, :ie_hack => false, :encoding => nil})
+ #
+ # Write the XML tree out, optionally with indent. This writes out the
+ # entire XML document, including XML declarations, doctype declarations,
+ # and processing instructions (if any are given).
+ #
+ # A controversial point is whether Document should always write the XML
+ # declaration (<?xml version='1.0'?>) whether or not one is given by the
+ # user (or source document). REXML does not write one if one was not
+ # specified, because it adds unnecessary bandwidth to applications such
+ # as XML-RPC.
+ #
+ # Accept Nth argument style and options Hash style as argument.
+ # The recommended style is options Hash style for one or more
+ # arguments case.
+ #
+ # _Examples_
+ # Document.new("<a><b/></a>").write
+ #
+ # output = ""
+ # Document.new("<a><b/></a>").write(output)
+ #
+ # output = ""
+ # Document.new("<a><b/></a>").write(:output => output, :indent => 2)
+ #
+ # See also the classes in the rexml/formatters package for the proper way
+ # to change the default formatting of XML output.
+ #
+ # _Examples_
+ #
+ # output = ""
+ # tr = Transitive.new
+ # tr.write(Document.new("<a><b/></a>"), output)
+ #
+ # output::
+ # output an object which supports '<< string'; this is where the
+ # document will be written.
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be twice this number of spaces, and children will be
+ # indented an additional amount. For a value of 3, every item will be
+ # indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the absolute *value* of the document -- that is, it leaves the value
+ # and number of Text nodes in the document unchanged.
+ # ie_hack::
+ # This hack inserts a space before the /> on empty tags to address
+ # a limitation of Internet Explorer. Defaults to false
+ # encoding::
+ # Encoding name as String. Change output encoding to specified encoding
+ # instead of encoding in XML declaration.
+ # Defaults to nil. It means encoding in XML declaration is used.
+ def write(*arguments)
+ if arguments.size == 1 and arguments[0].class == Hash
+ options = arguments[0]
+
+ output = options[:output]
+ indent = options[:indent]
+ transitive = options[:transitive]
+ ie_hack = options[:ie_hack]
+ encoding = options[:encoding]
+ else
+ output, indent, transitive, ie_hack, encoding, = *arguments
+ end
+
+ output ||= $stdout
+ indent ||= -1
+ transitive = false if transitive.nil?
+ ie_hack = false if ie_hack.nil?
+ encoding ||= xml_decl.encoding
+
+ if encoding != 'UTF-8' && !output.kind_of?(Output)
+ output = Output.new( output, encoding )
+ end
+ formatter = if indent > -1
+ if transitive
+ require "rexml/formatters/transitive"
+ REXML::Formatters::Transitive.new( indent, ie_hack )
+ else
+ REXML::Formatters::Pretty.new( indent, ie_hack )
+ end
+ else
+ REXML::Formatters::Default.new( ie_hack )
+ end
+ formatter.write( self, output )
+ end
+
+
+ def Document::parse_stream( source, listener )
+ Parsers::StreamParser.new( source, listener ).parse
+ end
+
+ # Set the entity expansion limit. By default the limit is set to 10000.
+ #
+ # Deprecated. Use REXML::Security.entity_expansion_limit= instead.
+ def Document::entity_expansion_limit=( val )
+ Security.entity_expansion_limit = val
+ end
+
+ # Get the entity expansion limit. By default the limit is set to 10000.
+ #
+ # Deprecated. Use REXML::Security.entity_expansion_limit= instead.
+ def Document::entity_expansion_limit
+ return Security.entity_expansion_limit
+ end
+
+ # Set the entity expansion limit. By default the limit is set to 10240.
+ #
+ # Deprecated. Use REXML::Security.entity_expansion_text_limit= instead.
+ def Document::entity_expansion_text_limit=( val )
+ Security.entity_expansion_text_limit = val
+ end
+
+ # Get the entity expansion limit. By default the limit is set to 10240.
+ #
+ # Deprecated. Use REXML::Security.entity_expansion_text_limit instead.
+ def Document::entity_expansion_text_limit
+ return Security.entity_expansion_text_limit
+ end
+
+ attr_reader :entity_expansion_count
+
+ def record_entity_expansion
+ @entity_expansion_count += 1
+ if @entity_expansion_count > Security.entity_expansion_limit
+ raise "number of entity expansions exceeded, processing aborted."
+ end
+ end
+
+ def document
+ self
+ end
+
+ private
+ def build( source )
+ Parsers::TreeParser.new( source, self ).parse
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/dtd/attlistdecl.rb b/jni/ruby/lib/rexml/dtd/attlistdecl.rb
new file mode 100644
index 0000000..25955ee
--- /dev/null
+++ b/jni/ruby/lib/rexml/dtd/attlistdecl.rb
@@ -0,0 +1,10 @@
+require "rexml/child"
+module REXML
+ module DTD
+ class AttlistDecl < Child
+ START = "<!ATTLIST"
+ START_RE = /^\s*#{START}/um
+ PATTERN_RE = /\s*(#{START}.*?>)/um
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/dtd/dtd.rb b/jni/ruby/lib/rexml/dtd/dtd.rb
new file mode 100644
index 0000000..62317ba
--- /dev/null
+++ b/jni/ruby/lib/rexml/dtd/dtd.rb
@@ -0,0 +1,46 @@
+require "rexml/dtd/elementdecl"
+require "rexml/dtd/entitydecl"
+require "rexml/comment"
+require "rexml/dtd/notationdecl"
+require "rexml/dtd/attlistdecl"
+require "rexml/parent"
+
+module REXML
+ module DTD
+ class Parser
+ def Parser.parse( input )
+ case input
+ when String
+ parse_helper input
+ when File
+ parse_helper input.read
+ end
+ end
+
+ # Takes a String and parses it out
+ def Parser.parse_helper( input )
+ contents = Parent.new
+ while input.size > 0
+ case input
+ when ElementDecl.PATTERN_RE
+ match = $&
+ contents << ElementDecl.new( match )
+ when AttlistDecl.PATTERN_RE
+ matchdata = $~
+ contents << AttlistDecl.new( matchdata )
+ when EntityDecl.PATTERN_RE
+ matchdata = $~
+ contents << EntityDecl.new( matchdata )
+ when Comment.PATTERN_RE
+ matchdata = $~
+ contents << Comment.new( matchdata )
+ when NotationDecl.PATTERN_RE
+ matchdata = $~
+ contents << NotationDecl.new( matchdata )
+ end
+ end
+ contents
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/dtd/elementdecl.rb b/jni/ruby/lib/rexml/dtd/elementdecl.rb
new file mode 100644
index 0000000..f90b27d
--- /dev/null
+++ b/jni/ruby/lib/rexml/dtd/elementdecl.rb
@@ -0,0 +1,17 @@
+require "rexml/child"
+module REXML
+ module DTD
+ class ElementDecl < Child
+ START = "<!ELEMENT"
+ START_RE = /^\s*#{START}/um
+ # PATTERN_RE = /^\s*(#{START}.*?)>/um
+ PATTERN_RE = /^\s*#{START}\s+((?:[:\w][-\.\w]*:)?[-!\*\.\w]*)(.*?)>/
+ #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
+
+ def initialize match
+ @name = match[1]
+ @rest = match[2]
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/dtd/entitydecl.rb b/jni/ruby/lib/rexml/dtd/entitydecl.rb
new file mode 100644
index 0000000..a9286b2
--- /dev/null
+++ b/jni/ruby/lib/rexml/dtd/entitydecl.rb
@@ -0,0 +1,56 @@
+require "rexml/child"
+module REXML
+ module DTD
+ class EntityDecl < Child
+ START = "<!ENTITY"
+ START_RE = /^\s*#{START}/um
+ PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
+ SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
+ PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
+ PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
+ # <!ENTITY name SYSTEM "...">
+ # <!ENTITY name "...">
+ def initialize src
+ super()
+ md = nil
+ if src.match( PUBLIC )
+ md = src.match( PUBLIC, true )
+ @middle = "PUBLIC"
+ @content = "#{md[2]} #{md[4]}"
+ elsif src.match( SYSTEM )
+ md = src.match( SYSTEM, true )
+ @middle = "SYSTEM"
+ @content = md[2]
+ elsif src.match( PLAIN )
+ md = src.match( PLAIN, true )
+ @middle = ""
+ @content = md[2]
+ elsif src.match( PERCENT )
+ md = src.match( PERCENT, true )
+ @middle = ""
+ @content = md[2]
+ end
+ raise ParseException.new("failed Entity match", src) if md.nil?
+ @name = md[1]
+ end
+
+ def to_s
+ rv = "<!ENTITY #@name "
+ rv << "#@middle " if @middle.size > 0
+ rv << @content
+ rv
+ end
+
+ def write( output, indent )
+ indent( output, indent )
+ output << to_s
+ end
+
+ def EntityDecl.parse_source source, listener
+ md = source.match( PATTERN_RE, true )
+ thing = md[0].squeeze(" \t\n\r")
+ listener.send inspect.downcase, thing
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/dtd/notationdecl.rb b/jni/ruby/lib/rexml/dtd/notationdecl.rb
new file mode 100644
index 0000000..17d1b9e
--- /dev/null
+++ b/jni/ruby/lib/rexml/dtd/notationdecl.rb
@@ -0,0 +1,39 @@
+require "rexml/child"
+module REXML
+ module DTD
+ class NotationDecl < Child
+ START = "<!NOTATION"
+ START_RE = /^\s*#{START}/um
+ PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+ SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+ def initialize src
+ super()
+ if src.match( PUBLIC )
+ md = src.match( PUBLIC, true )
+ elsif src.match( SYSTEM )
+ md = src.match( SYSTEM, true )
+ else
+ raise ParseException.new( "error parsing notation: no matching pattern", src )
+ end
+ @name = md[1]
+ @middle = md[2]
+ @rest = md[3]
+ end
+
+ def to_s
+ "<!NOTATION #@name #@middle #@rest>"
+ end
+
+ def write( output, indent )
+ indent( output, indent )
+ output << to_s
+ end
+
+ def NotationDecl.parse_source source, listener
+ md = source.match( PATTERN_RE, true )
+ thing = md[0].squeeze(" \t\n\r")
+ listener.send inspect.downcase, thing
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/element.rb b/jni/ruby/lib/rexml/element.rb
new file mode 100644
index 0000000..e459704
--- /dev/null
+++ b/jni/ruby/lib/rexml/element.rb
@@ -0,0 +1,1240 @@
+require "rexml/parent"
+require "rexml/namespace"
+require "rexml/attribute"
+require "rexml/cdata"
+require "rexml/xpath"
+require "rexml/parseexception"
+
+module REXML
+ # An implementation note about namespaces:
+ # As we parse, when we find namespaces we put them in a hash and assign
+ # them a unique ID. We then convert the namespace prefix for the node
+ # to the unique ID. This makes namespace lookup much faster for the
+ # cost of extra memory use. We save the namespace prefix for the
+ # context node and convert it back when we write it.
+ @@namespaces = {}
+
+ # Represents a tagged XML element. Elements are characterized by
+ # having children, attributes, and names, and can themselves be
+ # children.
+ class Element < Parent
+ include Namespace
+
+ UNDEFINED = "UNDEFINED"; # The default name
+
+ # Mechanisms for accessing attributes and child elements of this
+ # element.
+ attr_reader :attributes, :elements
+ # The context holds information about the processing environment, such as
+ # whitespace handling.
+ attr_accessor :context
+
+ # Constructor
+ # arg::
+ # if not supplied, will be set to the default value.
+ # If a String, the name of this object will be set to the argument.
+ # If an Element, the object will be shallowly cloned; name,
+ # attributes, and namespaces will be copied. Children will +not+ be
+ # copied.
+ # parent::
+ # if supplied, must be a Parent, and will be used as
+ # the parent of this object.
+ # context::
+ # If supplied, must be a hash containing context items. Context items
+ # include:
+ # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
+ # strings being the names of the elements to respect
+ # whitespace for. Defaults to :+all+.
+ # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
+ # strings being the names of the elements to ignore whitespace on.
+ # Overrides :+respect_whitespace+.
+ # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
+ # of strings being the names of the elements in which to ignore
+ # whitespace-only nodes. If this is set, Text nodes which contain only
+ # whitespace will not be added to the document tree.
+ # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
+ # the elements to process in raw mode. In raw mode, special
+ # characters in text is not converted to or from entities.
+ def initialize( arg = UNDEFINED, parent=nil, context=nil )
+ super(parent)
+
+ @elements = Elements.new(self)
+ @attributes = Attributes.new(self)
+ @context = context
+
+ if arg.kind_of? String
+ self.name = arg
+ elsif arg.kind_of? Element
+ self.name = arg.expanded_name
+ arg.attributes.each_attribute{ |attribute|
+ @attributes << Attribute.new( attribute )
+ }
+ @context = arg.context
+ end
+ end
+
+ def inspect
+ rv = "<#@expanded_name"
+
+ @attributes.each_attribute do |attr|
+ rv << " "
+ attr.write( rv, 0 )
+ end
+
+ if children.size > 0
+ rv << "> ... </>"
+ else
+ rv << "/>"
+ end
+ end
+
+
+ # Creates a shallow copy of self.
+ # d = Document.new "<a><b/><b/><c><d/></c></a>"
+ # new_a = d.root.clone
+ # puts new_a # => "<a/>"
+ def clone
+ self.class.new self
+ end
+
+ # Evaluates to the root node of the document that this element
+ # belongs to. If this element doesn't belong to a document, but does
+ # belong to another Element, the parent's root will be returned, until the
+ # earliest ancestor is found.
+ #
+ # Note that this is not the same as the document element.
+ # In the following example, <a> is the document element, and the root
+ # node is the parent node of the document element. You may ask yourself
+ # why the root node is useful: consider the doctype and XML declaration,
+ # and any processing instructions before the document element... they
+ # are children of the root node, or siblings of the document element.
+ # The only time this isn't true is when an Element is created that is
+ # not part of any Document. In this case, the ancestor that has no
+ # parent acts as the root node.
+ # d = Document.new '<a><b><c/></b></a>'
+ # a = d[1] ; c = a[1][1]
+ # d.root_node == d # TRUE
+ # a.root_node # namely, d
+ # c.root_node # again, d
+ def root_node
+ parent.nil? ? self : parent.root_node
+ end
+
+ def root
+ return elements[1] if self.kind_of? Document
+ return self if parent.kind_of? Document or parent.nil?
+ return parent.root
+ end
+
+ # Evaluates to the document to which this element belongs, or nil if this
+ # element doesn't belong to a document.
+ def document
+ rt = root
+ rt.parent if rt
+ end
+
+ # Evaluates to +true+ if whitespace is respected for this element. This
+ # is the case if:
+ # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
+ # 2. The context has :+respect_whitespace+ set to :+all+ or
+ # an array containing the name of this element, and
+ # :+compress_whitespace+ isn't set to :+all+ or an array containing the
+ # name of this element.
+ # The evaluation is tested against +expanded_name+, and so is namespace
+ # sensitive.
+ def whitespace
+ @whitespace = nil
+ if @context
+ if @context[:respect_whitespace]
+ @whitespace = (@context[:respect_whitespace] == :all or
+ @context[:respect_whitespace].include? expanded_name)
+ end
+ @whitespace = false if (@context[:compress_whitespace] and
+ (@context[:compress_whitespace] == :all or
+ @context[:compress_whitespace].include? expanded_name)
+ )
+ end
+ @whitespace = true unless @whitespace == false
+ @whitespace
+ end
+
+ def ignore_whitespace_nodes
+ @ignore_whitespace_nodes = false
+ if @context
+ if @context[:ignore_whitespace_nodes]
+ @ignore_whitespace_nodes =
+ (@context[:ignore_whitespace_nodes] == :all or
+ @context[:ignore_whitespace_nodes].include? expanded_name)
+ end
+ end
+ end
+
+ # Evaluates to +true+ if raw mode is set for this element. This
+ # is the case if the context has :+raw+ set to :+all+ or
+ # an array containing the name of this element.
+ #
+ # The evaluation is tested against +expanded_name+, and so is namespace
+ # sensitive.
+ def raw
+ @raw = (@context and @context[:raw] and
+ (@context[:raw] == :all or
+ @context[:raw].include? expanded_name))
+ @raw
+ end
+
+ #once :whitespace, :raw, :ignore_whitespace_nodes
+
+ #################################################
+ # Namespaces #
+ #################################################
+
+ # Evaluates to an +Array+ containing the prefixes (names) of all defined
+ # namespaces at this context node.
+ # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
+ # doc.elements['//b'].prefixes # -> ['x', 'y']
+ def prefixes
+ prefixes = []
+ prefixes = parent.prefixes if parent
+ prefixes |= attributes.prefixes
+ return prefixes
+ end
+
+ def namespaces
+ namespaces = {}
+ namespaces = parent.namespaces if parent
+ namespaces = namespaces.merge( attributes.namespaces )
+ return namespaces
+ end
+
+ # Evaluates to the URI for a prefix, or the empty string if no such
+ # namespace is declared for this element. Evaluates recursively for
+ # ancestors. Returns the default namespace, if there is one.
+ # prefix::
+ # the prefix to search for. If not supplied, returns the default
+ # namespace if one exists
+ # Returns::
+ # the namespace URI as a String, or nil if no such namespace
+ # exists. If the namespace is undefined, returns an empty string
+ # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
+ # b = doc.elements['//b']
+ # b.namespace # -> '1'
+ # b.namespace("y") # -> '2'
+ def namespace(prefix=nil)
+ if prefix.nil?
+ prefix = prefix()
+ end
+ if prefix == ''
+ prefix = "xmlns"
+ else
+ prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
+ end
+ ns = attributes[ prefix ]
+ ns = parent.namespace(prefix) if ns.nil? and parent
+ ns = '' if ns.nil? and prefix == 'xmlns'
+ return ns
+ end
+
+ # Adds a namespace to this element.
+ # prefix::
+ # the prefix string, or the namespace URI if +uri+ is not
+ # supplied
+ # uri::
+ # the namespace URI. May be nil, in which +prefix+ is used as
+ # the URI
+ # Evaluates to: this Element
+ # a = Element.new("a")
+ # a.add_namespace("xmlns:foo", "bar" )
+ # a.add_namespace("foo", "bar") # shorthand for previous line
+ # a.add_namespace("twiddle")
+ # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
+ def add_namespace( prefix, uri=nil )
+ unless uri
+ @attributes["xmlns"] = prefix
+ else
+ prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
+ @attributes[ prefix ] = uri
+ end
+ self
+ end
+
+ # Removes a namespace from this node. This only works if the namespace is
+ # actually declared in this node. If no argument is passed, deletes the
+ # default namespace.
+ #
+ # Evaluates to: this element
+ # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
+ # doc.root.delete_namespace
+ # puts doc # -> <a xmlns:foo='bar'/>
+ # doc.root.delete_namespace 'foo'
+ # puts doc # -> <a/>
+ def delete_namespace namespace="xmlns"
+ namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
+ attribute = attributes.get_attribute(namespace)
+ attribute.remove unless attribute.nil?
+ self
+ end
+
+ #################################################
+ # Elements #
+ #################################################
+
+ # Adds a child to this element, optionally setting attributes in
+ # the element.
+ # element::
+ # optional. If Element, the element is added.
+ # Otherwise, a new Element is constructed with the argument (see
+ # Element.initialize).
+ # attrs::
+ # If supplied, must be a Hash containing String name,value
+ # pairs, which will be used to set the attributes of the new Element.
+ # Returns:: the Element that was added
+ # el = doc.add_element 'my-tag'
+ # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
+ # el = Element.new 'my-tag'
+ # doc.add_element el
+ def add_element element, attrs=nil
+ raise "First argument must be either an element name, or an Element object" if element.nil?
+ el = @elements.add(element)
+ attrs.each do |key, value|
+ el.attributes[key]=value
+ end if attrs.kind_of? Hash
+ el
+ end
+
+ # Deletes a child element.
+ # element::
+ # Must be an +Element+, +String+, or +Integer+. If Element,
+ # the element is removed. If String, the element is found (via XPath)
+ # and removed. <em>This means that any parent can remove any
+ # descendant.<em> If Integer, the Element indexed by that number will be
+ # removed.
+ # Returns:: the element that was removed.
+ # doc.delete_element "/a/b/c[@id='4']"
+ # doc.delete_element doc.elements["//k"]
+ # doc.delete_element 1
+ def delete_element element
+ @elements.delete element
+ end
+
+ # Evaluates to +true+ if this element has at least one child Element
+ # doc = Document.new "<a><b/><c>Text</c></a>"
+ # doc.root.has_elements # -> true
+ # doc.elements["/a/b"].has_elements # -> false
+ # doc.elements["/a/c"].has_elements # -> false
+ def has_elements?
+ !@elements.empty?
+ end
+
+ # Iterates through the child elements, yielding for each Element that
+ # has a particular attribute set.
+ # key::
+ # the name of the attribute to search for
+ # value::
+ # the value of the attribute
+ # max::
+ # (optional) causes this method to return after yielding
+ # for this number of matching children
+ # name::
+ # (optional) if supplied, this is an XPath that filters
+ # the children to check.
+ #
+ # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
+ # # Yields b, c, d
+ # doc.root.each_element_with_attribute( 'id' ) {|e| p e}
+ # # Yields b, d
+ # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
+ # # Yields b
+ # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
+ # # Yields d
+ # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
+ def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
+ each_with_something( proc {|child|
+ if value.nil?
+ child.attributes[key] != nil
+ else
+ child.attributes[key]==value
+ end
+ }, max, name, &block )
+ end
+
+ # Iterates through the children, yielding for each Element that
+ # has a particular text set.
+ # text::
+ # the text to search for. If nil, or not supplied, will iterate
+ # over all +Element+ children that contain at least one +Text+ node.
+ # max::
+ # (optional) causes this method to return after yielding
+ # for this number of matching children
+ # name::
+ # (optional) if supplied, this is an XPath that filters
+ # the children to check.
+ #
+ # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
+ # # Yields b, c, d
+ # doc.each_element_with_text {|e|p e}
+ # # Yields b, c
+ # doc.each_element_with_text('b'){|e|p e}
+ # # Yields b
+ # doc.each_element_with_text('b', 1){|e|p e}
+ # # Yields d
+ # doc.each_element_with_text(nil, 0, 'd'){|e|p e}
+ def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
+ each_with_something( proc {|child|
+ if text.nil?
+ child.has_text?
+ else
+ child.text == text
+ end
+ }, max, name, &block )
+ end
+
+ # Synonym for Element.elements.each
+ def each_element( xpath=nil, &block ) # :yields: Element
+ @elements.each( xpath, &block )
+ end
+
+ # Synonym for Element.to_a
+ # This is a little slower than calling elements.each directly.
+ # xpath:: any XPath by which to search for elements in the tree
+ # Returns:: an array of Elements that match the supplied path
+ def get_elements( xpath )
+ @elements.to_a( xpath )
+ end
+
+ # Returns the next sibling that is an element, or nil if there is
+ # no Element sibling after this one
+ # doc = Document.new '<a><b/>text<c/></a>'
+ # doc.root.elements['b'].next_element #-> <c/>
+ # doc.root.elements['c'].next_element #-> nil
+ def next_element
+ element = next_sibling
+ element = element.next_sibling until element.nil? or element.kind_of? Element
+ return element
+ end
+
+ # Returns the previous sibling that is an element, or nil if there is
+ # no Element sibling prior to this one
+ # doc = Document.new '<a><b/>text<c/></a>'
+ # doc.root.elements['c'].previous_element #-> <b/>
+ # doc.root.elements['b'].previous_element #-> nil
+ def previous_element
+ element = previous_sibling
+ element = element.previous_sibling until element.nil? or element.kind_of? Element
+ return element
+ end
+
+
+ #################################################
+ # Text #
+ #################################################
+
+ # Evaluates to +true+ if this element has at least one Text child
+ def has_text?
+ not text().nil?
+ end
+
+ # A convenience method which returns the String value of the _first_
+ # child text element, if one exists, and +nil+ otherwise.
+ #
+ # <em>Note that an element may have multiple Text elements, perhaps
+ # separated by other children</em>. Be aware that this method only returns
+ # the first Text node.
+ #
+ # This method returns the +value+ of the first text child node, which
+ # ignores the +raw+ setting, so always returns normalized text. See
+ # the Text::value documentation.
+ #
+ # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
+ # # The element 'p' has two text elements, "some text " and " more text".
+ # doc.root.text #-> "some text "
+ def text( path = nil )
+ rv = get_text(path)
+ return rv.value unless rv.nil?
+ nil
+ end
+
+ # Returns the first child Text node, if any, or +nil+ otherwise.
+ # This method returns the actual +Text+ node, rather than the String content.
+ # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
+ # # The element 'p' has two text elements, "some text " and " more text".
+ # doc.root.get_text.value #-> "some text "
+ def get_text path = nil
+ rv = nil
+ if path
+ element = @elements[ path ]
+ rv = element.get_text unless element.nil?
+ else
+ rv = @children.find { |node| node.kind_of? Text }
+ end
+ return rv
+ end
+
+ # Sets the first Text child of this object. See text() for a
+ # discussion about Text children.
+ #
+ # If a Text child already exists, the child is replaced by this
+ # content. This means that Text content can be deleted by calling
+ # this method with a nil argument. In this case, the next Text
+ # child becomes the first Text child. In no case is the order of
+ # any siblings disturbed.
+ # text::
+ # If a String, a new Text child is created and added to
+ # this Element as the first Text child. If Text, the text is set
+ # as the first Child element. If nil, then any existing first Text
+ # child is removed.
+ # Returns:: this Element.
+ # doc = Document.new '<a><b/></a>'
+ # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
+ # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
+ # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
+ # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
+ # doc.root.text = nil #-> '<a><b/><c/></a>'
+ def text=( text )
+ if text.kind_of? String
+ text = Text.new( text, whitespace(), nil, raw() )
+ elsif !text.nil? and !text.kind_of? Text
+ text = Text.new( text.to_s, whitespace(), nil, raw() )
+ end
+ old_text = get_text
+ if text.nil?
+ old_text.remove unless old_text.nil?
+ else
+ if old_text.nil?
+ self << text
+ else
+ old_text.replace_with( text )
+ end
+ end
+ return self
+ end
+
+ # A helper method to add a Text child. Actual Text instances can
+ # be added with regular Parent methods, such as add() and <<()
+ # text::
+ # if a String, a new Text instance is created and added
+ # to the parent. If Text, the object is added directly.
+ # Returns:: this Element
+ # e = Element.new('a') #-> <e/>
+ # e.add_text 'foo' #-> <e>foo</e>
+ # e.add_text Text.new(' bar') #-> <e>foo bar</e>
+ # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
+ # element and <b>2</b> Text node children.
+ def add_text( text )
+ if text.kind_of? String
+ if @children[-1].kind_of? Text
+ @children[-1] << text
+ return
+ end
+ text = Text.new( text, whitespace(), nil, raw() )
+ end
+ self << text unless text.nil?
+ return self
+ end
+
+ def node_type
+ :element
+ end
+
+ def xpath
+ path_elements = []
+ cur = self
+ path_elements << __to_xpath_helper( self )
+ while cur.parent
+ cur = cur.parent
+ path_elements << __to_xpath_helper( cur )
+ end
+ return path_elements.reverse.join( "/" )
+ end
+
+ #################################################
+ # Attributes #
+ #################################################
+
+ def attribute( name, namespace=nil )
+ prefix = nil
+ if namespaces.respond_to? :key
+ prefix = namespaces.key(namespace) if namespace
+ else
+ prefix = namespaces.index(namespace) if namespace
+ end
+ prefix = nil if prefix == 'xmlns'
+
+ ret_val =
+ attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
+
+ return ret_val unless ret_val.nil?
+ return nil if prefix.nil?
+
+ # now check that prefix'es namespace is not the same as the
+ # default namespace
+ return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] )
+
+ attributes.get_attribute( name )
+
+ end
+
+ # Evaluates to +true+ if this element has any attributes set, false
+ # otherwise.
+ def has_attributes?
+ return !@attributes.empty?
+ end
+
+ # Adds an attribute to this element, overwriting any existing attribute
+ # by the same name.
+ # key::
+ # can be either an Attribute or a String. If an Attribute,
+ # the attribute is added to the list of Element attributes. If String,
+ # the argument is used as the name of the new attribute, and the value
+ # parameter must be supplied.
+ # value::
+ # Required if +key+ is a String, and ignored if the first argument is
+ # an Attribute. This is a String, and is used as the value
+ # of the new Attribute. This should be the unnormalized value of the
+ # attribute (without entities).
+ # Returns:: the Attribute added
+ # e = Element.new 'e'
+ # e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
+ # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
+ # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
+ def add_attribute( key, value=nil )
+ if key.kind_of? Attribute
+ @attributes << key
+ else
+ @attributes[key] = value
+ end
+ end
+
+ # Add multiple attributes to this element.
+ # hash:: is either a hash, or array of arrays
+ # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
+ # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
+ def add_attributes hash
+ if hash.kind_of? Hash
+ hash.each_pair {|key, value| @attributes[key] = value }
+ elsif hash.kind_of? Array
+ hash.each { |value| @attributes[ value[0] ] = value[1] }
+ end
+ end
+
+ # Removes an attribute
+ # key::
+ # either an Attribute or a String. In either case, the
+ # attribute is found by matching the attribute name to the argument,
+ # and then removed. If no attribute is found, no action is taken.
+ # Returns::
+ # the attribute removed, or nil if this Element did not contain
+ # a matching attribute
+ # e = Element.new('E')
+ # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
+ # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
+ # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
+ # e.delete_attribute( r ) #-> <E/>
+ def delete_attribute(key)
+ attr = @attributes.get_attribute(key)
+ attr.remove unless attr.nil?
+ end
+
+ #################################################
+ # Other Utilities #
+ #################################################
+
+ # Get an array of all CData children.
+ # IMMUTABLE
+ def cdatas
+ find_all { |child| child.kind_of? CData }.freeze
+ end
+
+ # Get an array of all Comment children.
+ # IMMUTABLE
+ def comments
+ find_all { |child| child.kind_of? Comment }.freeze
+ end
+
+ # Get an array of all Instruction children.
+ # IMMUTABLE
+ def instructions
+ find_all { |child| child.kind_of? Instruction }.freeze
+ end
+
+ # Get an array of all Text children.
+ # IMMUTABLE
+ def texts
+ find_all { |child| child.kind_of? Text }.freeze
+ end
+
+ # == DEPRECATED
+ # See REXML::Formatters
+ #
+ # Writes out this element, and recursively, all children.
+ # output::
+ # output an object which supports '<< string'; this is where the
+ # document will be written.
+ # indent::
+ # An integer. If -1, no indenting will be used; otherwise, the
+ # indentation will be this number of spaces, and children will be
+ # indented an additional amount. Defaults to -1
+ # transitive::
+ # If transitive is true and indent is >= 0, then the output will be
+ # pretty-printed in such a way that the added whitespace does not affect
+ # the parse tree of the document
+ # ie_hack::
+ # This hack inserts a space before the /> on empty tags to address
+ # a limitation of Internet Explorer. Defaults to false
+ #
+ # out = ''
+ # doc.write( out ) #-> doc is written to the string 'out'
+ # doc.write( $stdout ) #-> doc written to the console
+ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
+ Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
+ formatter = if indent > -1
+ if transitive
+ require "rexml/formatters/transitive"
+ REXML::Formatters::Transitive.new( indent, ie_hack )
+ else
+ REXML::Formatters::Pretty.new( indent, ie_hack )
+ end
+ else
+ REXML::Formatters::Default.new( ie_hack )
+ end
+ formatter.write( self, output )
+ end
+
+
+ private
+ def __to_xpath_helper node
+ rv = node.expanded_name.clone
+ if node.parent
+ results = node.parent.find_all {|n|
+ n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
+ }
+ if results.length > 1
+ idx = results.index( node )
+ rv << "[#{idx+1}]"
+ end
+ end
+ rv
+ end
+
+ # A private helper method
+ def each_with_something( test, max=0, name=nil )
+ num = 0
+ @elements.each( name ){ |child|
+ yield child if test.call(child) and num += 1
+ return if max>0 and num == max
+ }
+ end
+ end
+
+ ########################################################################
+ # ELEMENTS #
+ ########################################################################
+
+ # A class which provides filtering of children for Elements, and
+ # XPath search support. You are expected to only encounter this class as
+ # the <tt>element.elements</tt> object. Therefore, you are
+ # _not_ expected to instantiate this yourself.
+ class Elements
+ include Enumerable
+ # Constructor
+ # parent:: the parent Element
+ def initialize parent
+ @element = parent
+ end
+
+ # Fetches a child element. Filters only Element children, regardless of
+ # the XPath match.
+ # index::
+ # the search parameter. This is either an Integer, which
+ # will be used to find the index'th child Element, or an XPath,
+ # which will be used to search for the Element. <em>Because
+ # of the nature of XPath searches, any element in the connected XML
+ # document can be fetched through any other element.</em> <b>The
+ # Integer index is 1-based, not 0-based.</b> This means that the first
+ # child element is at index 1, not 0, and the +n+th element is at index
+ # +n+, not <tt>n-1</tt>. This is because XPath indexes element children
+ # starting from 1, not 0, and the indexes should be the same.
+ # name::
+ # optional, and only used in the first argument is an
+ # Integer. In that case, the index'th child Element that has the
+ # supplied name will be returned. Note again that the indexes start at 1.
+ # Returns:: the first matching Element, or nil if no child matched
+ # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
+ # doc.root.elements[1] #-> <b/>
+ # doc.root.elements['c'] #-> <c id="1"/>
+ # doc.root.elements[2,'c'] #-> <c id="2"/>
+ def []( index, name=nil)
+ if index.kind_of? Integer
+ raise "index (#{index}) must be >= 1" if index < 1
+ name = literalize(name) if name
+ num = 0
+ @element.find { |child|
+ child.kind_of? Element and
+ (name.nil? ? true : child.has_name?( name )) and
+ (num += 1) == index
+ }
+ else
+ return XPath::first( @element, index )
+ #{ |element|
+ # return element if element.kind_of? Element
+ #}
+ #return nil
+ end
+ end
+
+ # Sets an element, replacing any previous matching element. If no
+ # existing element is found ,the element is added.
+ # index:: Used to find a matching element to replace. See []().
+ # element::
+ # The element to replace the existing element with
+ # the previous element
+ # Returns:: nil if no previous element was found.
+ #
+ # doc = Document.new '<a/>'
+ # doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
+ # doc.root.elements[1] #-> <b/>
+ # doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
+ # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
+ def []=( index, element )
+ previous = self[index]
+ if previous.nil?
+ @element.add element
+ else
+ previous.replace_with element
+ end
+ return previous
+ end
+
+ # Returns +true+ if there are no +Element+ children, +false+ otherwise
+ def empty?
+ @element.find{ |child| child.kind_of? Element}.nil?
+ end
+
+ # Returns the index of the supplied child (starting at 1), or -1 if
+ # the element is not a child
+ # element:: an +Element+ child
+ def index element
+ rv = 0
+ found = @element.find do |child|
+ child.kind_of? Element and
+ (rv += 1) and
+ child == element
+ end
+ return rv if found == element
+ return -1
+ end
+
+ # Deletes a child Element
+ # element::
+ # Either an Element, which is removed directly; an
+ # xpath, where the first matching child is removed; or an Integer,
+ # where the n'th Element is removed.
+ # Returns:: the removed child
+ # doc = Document.new '<a><b/><c/><c id="1"/></a>'
+ # b = doc.root.elements[1]
+ # doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
+ # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
+ # doc.root.elements.delete 1 #-> <a/>
+ def delete element
+ if element.kind_of? Element
+ @element.delete element
+ else
+ el = self[element]
+ el.remove if el
+ end
+ end
+
+ # Removes multiple elements. Filters for Element children, regardless of
+ # XPath matching.
+ # xpath:: all elements matching this String path are removed.
+ # Returns:: an Array of Elements that have been removed
+ # doc = Document.new '<a><c/><c/><c/><c/></a>'
+ # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
+ def delete_all( xpath )
+ rv = []
+ XPath::each( @element, xpath) {|element|
+ rv << element if element.kind_of? Element
+ }
+ rv.each do |element|
+ @element.delete element
+ element.remove
+ end
+ return rv
+ end
+
+ # Adds an element
+ # element::
+ # if supplied, is either an Element, String, or
+ # Source (see Element.initialize). If not supplied or nil, a
+ # new, default Element will be constructed
+ # Returns:: the added Element
+ # a = Element.new('a')
+ # a.elements.add(Element.new('b')) #-> <a><b/></a>
+ # a.elements.add('c') #-> <a><b/><c/></a>
+ def add element=nil
+ if element.nil?
+ Element.new("", self, @element.context)
+ elsif not element.kind_of?(Element)
+ Element.new(element, self, @element.context)
+ else
+ @element << element
+ element.context = @element.context
+ element
+ end
+ end
+
+ alias :<< :add
+
+ # Iterates through all of the child Elements, optionally filtering
+ # them by a given XPath
+ # xpath::
+ # optional. If supplied, this is a String XPath, and is used to
+ # filter the children, so that only matching children are yielded. Note
+ # that XPaths are automatically filtered for Elements, so that
+ # non-Element children will not be yielded
+ # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
+ # doc.root.elements.each {|e|p e} #-> Yields b, c, d, b, c, d elements
+ # doc.root.elements.each('b') {|e|p e} #-> Yields b, b elements
+ # doc.root.elements.each('child::node()') {|e|p e}
+ # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
+ # XPath.each(doc.root, 'child::node()', &block)
+ # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
+ def each( xpath=nil )
+ XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
+ end
+
+ def collect( xpath=nil )
+ collection = []
+ XPath::each( @element, xpath ) {|e|
+ collection << yield(e) if e.kind_of?(Element)
+ }
+ collection
+ end
+
+ def inject( xpath=nil, initial=nil )
+ first = true
+ XPath::each( @element, xpath ) {|e|
+ if (e.kind_of? Element)
+ if (first and initial == nil)
+ initial = e
+ first = false
+ else
+ initial = yield( initial, e ) if e.kind_of? Element
+ end
+ end
+ }
+ initial
+ end
+
+ # Returns the number of +Element+ children of the parent object.
+ # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
+ # doc.root.size #-> 6, 3 element and 3 text nodes
+ # doc.root.elements.size #-> 3
+ def size
+ count = 0
+ @element.each {|child| count+=1 if child.kind_of? Element }
+ count
+ end
+
+ # Returns an Array of Element children. An XPath may be supplied to
+ # filter the children. Only Element children are returned, even if the
+ # supplied XPath matches non-Element children.
+ # doc = Document.new '<a>sean<b/>elliott<c/></a>'
+ # doc.root.elements.to_a #-> [ <b/>, <c/> ]
+ # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
+ # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
+ def to_a( xpath=nil )
+ rv = XPath.match( @element, xpath )
+ return rv.find_all{|e| e.kind_of? Element} if xpath
+ rv
+ end
+
+ private
+ # Private helper class. Removes quotes from quoted strings
+ def literalize name
+ name = name[1..-2] if name[0] == ?' or name[0] == ?" #'
+ name
+ end
+ end
+
+ ########################################################################
+ # ATTRIBUTES #
+ ########################################################################
+
+ # A class that defines the set of Attributes of an Element and provides
+ # operations for accessing elements in that set.
+ class Attributes < Hash
+ # Constructor
+ # element:: the Element of which this is an Attribute
+ def initialize element
+ @element = element
+ end
+
+ # Fetches an attribute value. If you want to get the Attribute itself,
+ # use get_attribute()
+ # name:: an XPath attribute name. Namespaces are relevant here.
+ # Returns::
+ # the String value of the matching attribute, or +nil+ if no
+ # matching attribute was found. This is the unnormalized value
+ # (with entities expanded).
+ #
+ # doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
+ # doc.root.attributes['att'] #-> '<'
+ # doc.root.attributes['bar:att'] #-> '2'
+ def [](name)
+ attr = get_attribute(name)
+ return attr.value unless attr.nil?
+ return nil
+ end
+
+ def to_a
+ enum_for(:each_attribute).to_a
+ end
+
+ # Returns the number of attributes the owning Element contains.
+ # doc = Document "<a x='1' y='2' foo:x='3'/>"
+ # doc.root.attributes.length #-> 3
+ def length
+ c = 0
+ each_attribute { c+=1 }
+ c
+ end
+ alias :size :length
+
+ # Iterates over the attributes of an Element. Yields actual Attribute
+ # nodes, not String values.
+ #
+ # doc = Document.new '<a x="1" y="2"/>'
+ # doc.root.attributes.each_attribute {|attr|
+ # p attr.expanded_name+" => "+attr.value
+ # }
+ def each_attribute # :yields: attribute
+ each_value do |val|
+ if val.kind_of? Attribute
+ yield val
+ else
+ val.each_value { |atr| yield atr }
+ end
+ end
+ end
+
+ # Iterates over each attribute of an Element, yielding the expanded name
+ # and value as a pair of Strings.
+ #
+ # doc = Document.new '<a x="1" y="2"/>'
+ # doc.root.attributes.each {|name, value| p name+" => "+value }
+ def each
+ each_attribute do |attr|
+ yield [attr.expanded_name, attr.value]
+ end
+ end
+
+ # Fetches an attribute
+ # name::
+ # the name by which to search for the attribute. Can be a
+ # <tt>prefix:name</tt> namespace name.
+ # Returns:: The first matching attribute, or nil if there was none. This
+ # value is an Attribute node, not the String value of the attribute.
+ # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
+ # doc.root.attributes.get_attribute("foo").value #-> "2"
+ # doc.root.attributes.get_attribute("x:foo").value #-> "1"
+ def get_attribute( name )
+ attr = fetch( name, nil )
+ if attr.nil?
+ return nil if name.nil?
+ # Look for prefix
+ name =~ Namespace::NAMESPLIT
+ prefix, n = $1, $2
+ if prefix
+ attr = fetch( n, nil )
+ # check prefix
+ if attr == nil
+ elsif attr.kind_of? Attribute
+ return attr if prefix == attr.prefix
+ else
+ attr = attr[ prefix ]
+ return attr
+ end
+ end
+ element_document = @element.document
+ if element_document and element_document.doctype
+ expn = @element.expanded_name
+ expn = element_document.doctype.name if expn.size == 0
+ attr_val = element_document.doctype.attribute_of(expn, name)
+ return Attribute.new( name, attr_val ) if attr_val
+ end
+ return nil
+ end
+ if attr.kind_of? Hash
+ attr = attr[ @element.prefix ]
+ end
+ return attr
+ end
+
+ # Sets an attribute, overwriting any existing attribute value by the
+ # same name. Namespace is significant.
+ # name:: the name of the attribute
+ # value::
+ # (optional) If supplied, the value of the attribute. If
+ # nil, any existing matching attribute is deleted.
+ # Returns::
+ # Owning element
+ # doc = Document.new "<a x:foo='1' foo='3'/>"
+ # doc.root.attributes['y:foo'] = '2'
+ # doc.root.attributes['foo'] = '4'
+ # doc.root.attributes['x:foo'] = nil
+ def []=( name, value )
+ if value.nil? # Delete the named attribute
+ attr = get_attribute(name)
+ delete attr
+ return
+ end
+
+ unless value.kind_of? Attribute
+ if @element.document and @element.document.doctype
+ value = Text::normalize( value, @element.document.doctype )
+ else
+ value = Text::normalize( value, nil )
+ end
+ value = Attribute.new(name, value)
+ end
+ value.element = @element
+ old_attr = fetch(value.name, nil)
+ if old_attr.nil?
+ store(value.name, value)
+ elsif old_attr.kind_of? Hash
+ old_attr[value.prefix] = value
+ elsif old_attr.prefix != value.prefix
+ # Check for conflicting namespaces
+ raise ParseException.new(
+ "Namespace conflict in adding attribute \"#{value.name}\": "+
+ "Prefix \"#{old_attr.prefix}\" = "+
+ "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
+ "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
+ value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
+ @element.namespace( old_attr.prefix ) ==
+ @element.namespace( value.prefix )
+ store value.name, { old_attr.prefix => old_attr,
+ value.prefix => value }
+ else
+ store value.name, value
+ end
+ return @element
+ end
+
+ # Returns an array of Strings containing all of the prefixes declared
+ # by this set of # attributes. The array does not include the default
+ # namespace declaration, if one exists.
+ # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
+ # "z='glorp' p:k='gru'/>")
+ # prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
+ def prefixes
+ ns = []
+ each_attribute do |attribute|
+ ns << attribute.name if attribute.prefix == 'xmlns'
+ end
+ if @element.document and @element.document.doctype
+ expn = @element.expanded_name
+ expn = @element.document.doctype.name if expn.size == 0
+ @element.document.doctype.attributes_of(expn).each {
+ |attribute|
+ ns << attribute.name if attribute.prefix == 'xmlns'
+ }
+ end
+ ns
+ end
+
+ def namespaces
+ namespaces = {}
+ each_attribute do |attribute|
+ namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
+ end
+ if @element.document and @element.document.doctype
+ expn = @element.expanded_name
+ expn = @element.document.doctype.name if expn.size == 0
+ @element.document.doctype.attributes_of(expn).each {
+ |attribute|
+ namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
+ }
+ end
+ namespaces
+ end
+
+ # Removes an attribute
+ # attribute::
+ # either a String, which is the name of the attribute to remove --
+ # namespaces are significant here -- or the attribute to remove.
+ # Returns:: the owning element
+ # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
+ # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
+ # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
+ # attr = doc.root.attributes.get_attribute('y:foo')
+ # doc.root.attributes.delete attr #-> <a z:foo='4'/>"
+ def delete( attribute )
+ name = nil
+ prefix = nil
+ if attribute.kind_of? Attribute
+ name = attribute.name
+ prefix = attribute.prefix
+ else
+ attribute =~ Namespace::NAMESPLIT
+ prefix, name = $1, $2
+ prefix = '' unless prefix
+ end
+ old = fetch(name, nil)
+ if old.kind_of? Hash # the supplied attribute is one of many
+ old.delete(prefix)
+ if old.size == 1
+ repl = nil
+ old.each_value{|v| repl = v}
+ store name, repl
+ end
+ elsif old.nil?
+ return @element
+ else # the supplied attribute is a top-level one
+ super(name)
+ end
+ @element
+ end
+
+ # Adds an attribute, overriding any existing attribute by the
+ # same name. Namespaces are significant.
+ # attribute:: An Attribute
+ def add( attribute )
+ self[attribute.name] = attribute
+ end
+
+ alias :<< :add
+
+ # Deletes all attributes matching a name. Namespaces are significant.
+ # name::
+ # A String; all attributes that match this path will be removed
+ # Returns:: an Array of the Attributes that were removed
+ def delete_all( name )
+ rv = []
+ each_attribute { |attribute|
+ rv << attribute if attribute.expanded_name == name
+ }
+ rv.each{ |attr| attr.remove }
+ return rv
+ end
+
+ # The +get_attribute_ns+ method retrieves a method by its namespace
+ # and name. Thus it is possible to reliably identify an attribute
+ # even if an XML processor has changed the prefix.
+ #
+ # Method contributed by Henrik Martensson
+ def get_attribute_ns(namespace, name)
+ result = nil
+ each_attribute() { |attribute|
+ if name == attribute.name &&
+ namespace == attribute.namespace() &&
+ ( !namespace.empty? || !attribute.fully_expanded_name.index(':') )
+ # foo will match xmlns:foo, but only if foo isn't also an attribute
+ result = attribute if !result or !namespace.empty? or
+ !attribute.fully_expanded_name.index(':')
+ end
+ }
+ result
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/encoding.rb b/jni/ruby/lib/rexml/encoding.rb
new file mode 100644
index 0000000..1c7e79a
--- /dev/null
+++ b/jni/ruby/lib/rexml/encoding.rb
@@ -0,0 +1,50 @@
+# coding: US-ASCII
+module REXML
+ module Encoding
+ # ID ---> Encoding name
+ attr_reader :encoding
+ def encoding=(encoding)
+ encoding = encoding.name if encoding.is_a?(Encoding)
+ if encoding.is_a?(String)
+ original_encoding = encoding
+ encoding = find_encoding(encoding)
+ unless encoding
+ raise ArgumentError, "Bad encoding name #{original_encoding}"
+ end
+ end
+ return false if defined?(@encoding) and encoding == @encoding
+ if encoding
+ @encoding = encoding.upcase
+ else
+ @encoding = 'UTF-8'
+ end
+ true
+ end
+
+ def encode(string)
+ string.encode(@encoding)
+ end
+
+ def decode(string)
+ string.encode(::Encoding::UTF_8, @encoding)
+ end
+
+ private
+ def find_encoding(name)
+ case name
+ when /\Ashift-jis\z/i
+ return "SHIFT_JIS"
+ when /\ACP-(\d+)\z/
+ name = "CP#{$1}"
+ when /\AUTF-8\z/i
+ return name
+ end
+ begin
+ ::Encoding::Converter.search_convpath(name, 'UTF-8')
+ rescue ::Encoding::ConverterNotFoundError
+ return nil
+ end
+ name
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/entity.rb b/jni/ruby/lib/rexml/entity.rb
new file mode 100644
index 0000000..3a35ec6
--- /dev/null
+++ b/jni/ruby/lib/rexml/entity.rb
@@ -0,0 +1,173 @@
+require 'rexml/child'
+require 'rexml/source'
+require 'rexml/xmltokens'
+
+module REXML
+ # God, I hate DTDs. I really do. Why this idiot standard still
+ # plagues us is beyond me.
+ class Entity < Child
+ include XMLTokens
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
+ PEREFERENCE = "%#{NAME};"
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+
+ attr_reader :name, :external, :ref, :ndata, :pubid
+
+ # Create a new entity. Simple entities can be constructed by passing a
+ # name, value to the constructor; this creates a generic, plain entity
+ # reference. For anything more complicated, you have to pass a Source to
+ # the constructor with the entity definition, or use the accessor methods.
+ # +WARNING+: There is no validation of entity state except when the entity
+ # is read from a stream. If you start poking around with the accessors,
+ # you can easily create a non-conformant Entity. The best thing to do is
+ # dump the stupid DTDs and use XMLSchema instead.
+ #
+ # e = Entity.new( 'amp', '&' )
+ def initialize stream, value=nil, parent=nil, reference=false
+ super(parent)
+ @ndata = @pubid = @value = @external = nil
+ if stream.kind_of? Array
+ @name = stream[1]
+ if stream[-1] == '%'
+ @reference = true
+ stream.pop
+ else
+ @reference = false
+ end
+ if stream[2] =~ /SYSTEM|PUBLIC/
+ @external = stream[2]
+ if @external == 'SYSTEM'
+ @ref = stream[3]
+ @ndata = stream[4] if stream.size == 5
+ else
+ @pubid = stream[3]
+ @ref = stream[4]
+ end
+ else
+ @value = stream[2]
+ end
+ else
+ @reference = reference
+ @external = nil
+ @name = stream
+ @value = value
+ end
+ end
+
+ # Evaluates whether the given string matches an entity definition,
+ # returning true if so, and false otherwise.
+ def Entity::matches? string
+ (ENTITYDECL =~ string) == 0
+ end
+
+ # Evaluates to the unnormalized value of this entity; that is, replacing
+ # all entities -- both %ent; and &ent; entities. This differs from
+ # +value()+ in that +value+ only replaces %ent; entities.
+ def unnormalized
+ document.record_entity_expansion unless document.nil?
+ v = value()
+ return nil if v.nil?
+ @unnormalized = Text::unnormalize(v, parent)
+ @unnormalized
+ end
+
+ #once :unnormalized
+
+ # Returns the value of this entity unprocessed -- raw. This is the
+ # normalized value; that is, with all %ent; and &ent; entities intact
+ def normalized
+ @value
+ end
+
+ # Write out a fully formed, correct entity definition (assuming the Entity
+ # object itself is valid.)
+ #
+ # out::
+ # An object implementing <TT>&lt;&lt;<TT> to which the entity will be
+ # output
+ # indent::
+ # *DEPRECATED* and ignored
+ def write out, indent=-1
+ out << '<!ENTITY '
+ out << '% ' if @reference
+ out << @name
+ out << ' '
+ if @external
+ out << @external << ' '
+ if @pubid
+ q = @pubid.include?('"')?"'":'"'
+ out << q << @pubid << q << ' '
+ end
+ q = @ref.include?('"')?"'":'"'
+ out << q << @ref << q
+ out << ' NDATA ' << @ndata if @ndata
+ else
+ q = @value.include?('"')?"'":'"'
+ out << q << @value << q
+ end
+ out << '>'
+ end
+
+ # Returns this entity as a string. See write().
+ def to_s
+ rv = ''
+ write rv
+ rv
+ end
+
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
+ # Returns the value of this entity. At the moment, only internal entities
+ # are processed. If the value contains internal references (IE,
+ # %blah;), those are replaced with their values. IE, if the doctype
+ # contains:
+ # <!ENTITY % foo "bar">
+ # <!ENTITY yada "nanoo %foo; nanoo>
+ # then:
+ # doctype.entity('yada').value #-> "nanoo bar nanoo"
+ def value
+ if @value
+ matches = @value.scan(PEREFERENCE_RE)
+ rv = @value.clone
+ if @parent
+ sum = 0
+ matches.each do |entity_reference|
+ entity_value = @parent.entity( entity_reference[0] )
+ if sum + entity_value.bytesize > Security.entity_expansion_text_limit
+ raise "entity expansion has grown too large"
+ else
+ sum += entity_value.bytesize
+ end
+ rv.gsub!( /%#{entity_reference.join};/um, entity_value )
+ end
+ end
+ return rv
+ end
+ nil
+ end
+ end
+
+ # This is a set of entity constants -- the ones defined in the XML
+ # specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
+ # CAUTION: these entities does not have parent and document
+ module EntityConst
+ # +>+
+ GT = Entity.new( 'gt', '>' )
+ # +<+
+ LT = Entity.new( 'lt', '<' )
+ # +&+
+ AMP = Entity.new( 'amp', '&' )
+ # +"+
+ QUOT = Entity.new( 'quot', '"' )
+ # +'+
+ APOS = Entity.new( 'apos', "'" )
+ end
+end
diff --git a/jni/ruby/lib/rexml/formatters/default.rb b/jni/ruby/lib/rexml/formatters/default.rb
new file mode 100644
index 0000000..574c821
--- /dev/null
+++ b/jni/ruby/lib/rexml/formatters/default.rb
@@ -0,0 +1,111 @@
+module REXML
+ module Formatters
+ class Default
+ # Prints out the XML document with no formatting -- except if id_hack is
+ # set.
+ #
+ # ie_hack::
+ # If set to true, then inserts whitespace before the close of an empty
+ # tag, so that IE's bad XML parser doesn't choke.
+ def initialize( ie_hack=false )
+ @ie_hack = ie_hack
+ end
+
+ # Writes the node to some output.
+ #
+ # node::
+ # The node to write
+ # output::
+ # A class implementing <TT>&lt;&lt;</TT>. Pass in an Output object to
+ # change the output encoding.
+ def write( node, output )
+ case node
+
+ when Document
+ if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
+ output = Output.new( output, node.xml_decl.encoding )
+ end
+ write_document( node, output )
+
+ when Element
+ write_element( node, output )
+
+ when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
+ Attribute, AttlistDecl
+ node.write( output,-1 )
+
+ when Instruction
+ write_instruction( node, output )
+
+ when DocType, XMLDecl
+ node.write( output )
+
+ when Comment
+ write_comment( node, output )
+
+ when CData
+ write_cdata( node, output )
+
+ when Text
+ write_text( node, output )
+
+ else
+ raise Exception.new("XML FORMATTING ERROR")
+
+ end
+ end
+
+ protected
+ def write_document( node, output )
+ node.children.each { |child| write( child, output ) }
+ end
+
+ def write_element( node, output )
+ output << "<#{node.expanded_name}"
+
+ node.attributes.to_a.map { |a|
+ Hash === a ? a.values : a
+ }.flatten.sort_by {|attr| attr.name}.each do |attr|
+ output << " "
+ attr.write( output )
+ end unless node.attributes.empty?
+
+ if node.children.empty?
+ output << " " if @ie_hack
+ output << "/"
+ else
+ output << ">"
+ node.children.each { |child|
+ write( child, output )
+ }
+ output << "</#{node.expanded_name}"
+ end
+ output << ">"
+ end
+
+ def write_text( node, output )
+ output << node.to_s()
+ end
+
+ def write_comment( node, output )
+ output << Comment::START
+ output << node.to_s
+ output << Comment::STOP
+ end
+
+ def write_cdata( node, output )
+ output << CData::START
+ output << node.to_s
+ output << CData::STOP
+ end
+
+ def write_instruction( node, output )
+ output << Instruction::START.sub(/\\/u, '')
+ output << node.target
+ output << ' '
+ output << node.content
+ output << Instruction::STOP.sub(/\\/u, '')
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/formatters/pretty.rb b/jni/ruby/lib/rexml/formatters/pretty.rb
new file mode 100644
index 0000000..e5ba561
--- /dev/null
+++ b/jni/ruby/lib/rexml/formatters/pretty.rb
@@ -0,0 +1,141 @@
+require 'rexml/formatters/default'
+
+module REXML
+ module Formatters
+ # Pretty-prints an XML document. This destroys whitespace in text nodes
+ # and will insert carriage returns and indentations.
+ #
+ # TODO: Add an option to print attributes on new lines
+ class Pretty < Default
+
+ # If compact is set to true, then the formatter will attempt to use as
+ # little space as possible
+ attr_accessor :compact
+ # The width of a page. Used for formatting text
+ attr_accessor :width
+
+ # Create a new pretty printer.
+ #
+ # output::
+ # An object implementing '<<(String)', to which the output will be written.
+ # indentation::
+ # An integer greater than 0. The indentation of each level will be
+ # this number of spaces. If this is < 1, the behavior of this object
+ # is undefined. Defaults to 2.
+ # ie_hack::
+ # If true, the printer will insert whitespace before closing empty
+ # tags, thereby allowing Internet Explorer's XML parser to
+ # function. Defaults to false.
+ def initialize( indentation=2, ie_hack=false )
+ @indentation = indentation
+ @level = 0
+ @ie_hack = ie_hack
+ @width = 80
+ @compact = false
+ end
+
+ protected
+ def write_element(node, output)
+ output << ' '*@level
+ output << "<#{node.expanded_name}"
+
+ node.attributes.each_attribute do |attr|
+ output << " "
+ attr.write( output )
+ end unless node.attributes.empty?
+
+ if node.children.empty?
+ if @ie_hack
+ output << " "
+ end
+ output << "/"
+ else
+ output << ">"
+ # If compact and all children are text, and if the formatted output
+ # is less than the specified width, then try to print everything on
+ # one line
+ skip = false
+ if compact
+ if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
+ string = ""
+ old_level = @level
+ @level = 0
+ node.children.each { |child| write( child, string ) }
+ @level = old_level
+ if string.length < @width
+ output << string
+ skip = true
+ end
+ end
+ end
+ unless skip
+ output << "\n"
+ @level += @indentation
+ node.children.each { |child|
+ next if child.kind_of?(Text) and child.to_s.strip.length == 0
+ write( child, output )
+ output << "\n"
+ }
+ @level -= @indentation
+ output << ' '*@level
+ end
+ output << "</#{node.expanded_name}"
+ end
+ output << ">"
+ end
+
+ def write_text( node, output )
+ s = node.to_s()
+ s.gsub!(/\s/,' ')
+ s.squeeze!(" ")
+ s = wrap(s, @width - @level)
+ s = indent_text(s, @level, " ", true)
+ output << (' '*@level + s)
+ end
+
+ def write_comment( node, output)
+ output << ' ' * @level
+ super
+ end
+
+ def write_cdata( node, output)
+ output << ' ' * @level
+ super
+ end
+
+ def write_document( node, output )
+ # Ok, this is a bit odd. All XML documents have an XML declaration,
+ # but it may not write itself if the user didn't specifically add it,
+ # either through the API or in the input document. If it doesn't write
+ # itself, then we don't need a carriage return... which makes this
+ # logic more complex.
+ node.children.each { |child|
+ next if child == node.children[-1] and child.instance_of?(Text)
+ unless child == node.children[0] or child.instance_of?(Text) or
+ (child == node.children[1] and !node.children[0].writethis)
+ output << "\n"
+ end
+ write( child, output )
+ }
+ end
+
+ private
+ def indent_text(string, level=1, style="\t", indentfirstline=true)
+ return string if level < 0
+ string.gsub(/\n/, "\n#{style*level}")
+ end
+
+ def wrap(string, width)
+ parts = []
+ while string.length > width and place = string.rindex(' ', width)
+ parts << string[0...place]
+ string = string[place+1..-1]
+ end
+ parts << string
+ parts.join("\n")
+ end
+
+ end
+ end
+end
+
diff --git a/jni/ruby/lib/rexml/formatters/transitive.rb b/jni/ruby/lib/rexml/formatters/transitive.rb
new file mode 100644
index 0000000..6cc690d
--- /dev/null
+++ b/jni/ruby/lib/rexml/formatters/transitive.rb
@@ -0,0 +1,57 @@
+require 'rexml/formatters/pretty'
+
+module REXML
+ module Formatters
+ # The Transitive formatter writes an XML document that parses to an
+ # identical document as the source document. This means that no extra
+ # whitespace nodes are inserted, and whitespace within text nodes is
+ # preserved. Within these constraints, the document is pretty-printed,
+ # with whitespace inserted into the metadata to introduce formatting.
+ #
+ # Note that this is only useful if the original XML is not already
+ # formatted. Since this formatter does not alter whitespace nodes, the
+ # results of formatting already formatted XML will be odd.
+ class Transitive < Default
+ def initialize( indentation=2, ie_hack=false )
+ @indentation = indentation
+ @level = 0
+ @ie_hack = ie_hack
+ end
+
+ protected
+ def write_element( node, output )
+ output << "<#{node.expanded_name}"
+
+ node.attributes.each_attribute do |attr|
+ output << " "
+ attr.write( output )
+ end unless node.attributes.empty?
+
+ output << "\n"
+ output << ' '*@level
+ if node.children.empty?
+ output << " " if @ie_hack
+ output << "/"
+ else
+ output << ">"
+ # If compact and all children are text, and if the formatted output
+ # is less than the specified width, then try to print everything on
+ # one line
+ @level += @indentation
+ node.children.each { |child|
+ write( child, output )
+ }
+ @level -= @indentation
+ output << "</#{node.expanded_name}"
+ output << "\n"
+ output << ' '*@level
+ end
+ output << ">"
+ end
+
+ def write_text( node, output )
+ output << node.to_s()
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/functions.rb b/jni/ruby/lib/rexml/functions.rb
new file mode 100644
index 0000000..2010be1
--- /dev/null
+++ b/jni/ruby/lib/rexml/functions.rb
@@ -0,0 +1,394 @@
+module REXML
+ # If you add a method, keep in mind two things:
+ # (1) the first argument will always be a list of nodes from which to
+ # filter. In the case of context methods (such as position), the function
+ # should return an array with a value for each child in the array.
+ # (2) all method calls from XML will have "-" replaced with "_".
+ # Therefore, in XML, "local-name()" is identical (and actually becomes)
+ # "local_name()"
+ module Functions
+ @@context = nil
+ @@namespace_context = {}
+ @@variables = {}
+
+ def Functions::namespace_context=(x) ; @@namespace_context=x ; end
+ def Functions::variables=(x) ; @@variables=x ; end
+ def Functions::namespace_context ; @@namespace_context ; end
+ def Functions::variables ; @@variables ; end
+
+ def Functions::context=(value); @@context = value; end
+
+ def Functions::text( )
+ if @@context[:node].node_type == :element
+ return @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
+ elsif @@context[:node].node_type == :text
+ return @@context[:node].value
+ else
+ return false
+ end
+ end
+
+ # Returns the last node of the given list of nodes.
+ def Functions::last( )
+ @@context[:size]
+ end
+
+ def Functions::position( )
+ @@context[:index]
+ end
+
+ # Returns the size of the given list of nodes.
+ def Functions::count( node_set )
+ node_set.size
+ end
+
+ # Since REXML is non-validating, this method is not implemented as it
+ # requires a DTD
+ def Functions::id( object )
+ end
+
+ # UNTESTED
+ def Functions::local_name( node_set=nil )
+ get_namespace( node_set ) do |node|
+ return node.local_name
+ end
+ end
+
+ def Functions::namespace_uri( node_set=nil )
+ get_namespace( node_set ) {|node| node.namespace}
+ end
+
+ def Functions::name( node_set=nil )
+ get_namespace( node_set ) do |node|
+ node.expanded_name
+ end
+ end
+
+ # Helper method.
+ def Functions::get_namespace( node_set = nil )
+ if node_set == nil
+ yield @@context[:node] if defined? @@context[:node].namespace
+ else
+ if node_set.respond_to? :each
+ node_set.each { |node| yield node if defined? node.namespace }
+ elsif node_set.respond_to? :namespace
+ yield node_set
+ end
+ end
+ end
+
+ # A node-set is converted to a string by returning the string-value of the
+ # node in the node-set that is first in document order. If the node-set is
+ # empty, an empty string is returned.
+ #
+ # A number is converted to a string as follows
+ #
+ # NaN is converted to the string NaN
+ #
+ # positive zero is converted to the string 0
+ #
+ # negative zero is converted to the string 0
+ #
+ # positive infinity is converted to the string Infinity
+ #
+ # negative infinity is converted to the string -Infinity
+ #
+ # if the number is an integer, the number is represented in decimal form
+ # as a Number with no decimal point and no leading zeros, preceded by a
+ # minus sign (-) if the number is negative
+ #
+ # otherwise, the number is represented in decimal form as a Number
+ # including a decimal point with at least one digit before the decimal
+ # point and at least one digit after the decimal point, preceded by a
+ # minus sign (-) if the number is negative; there must be no leading zeros
+ # before the decimal point apart possibly from the one required digit
+ # immediately before the decimal point; beyond the one required digit
+ # after the decimal point there must be as many, but only as many, more
+ # digits as are needed to uniquely distinguish the number from all other
+ # IEEE 754 numeric values.
+ #
+ # The boolean false value is converted to the string false. The boolean
+ # true value is converted to the string true.
+ #
+ # An object of a type other than the four basic types is converted to a
+ # string in a way that is dependent on that type.
+ def Functions::string( object=nil )
+ #object = @context unless object
+ if object.instance_of? Array
+ string( object[0] )
+ elsif defined? object.node_type
+ if object.node_type == :attribute
+ object.value
+ elsif object.node_type == :element || object.node_type == :document
+ string_value(object)
+ else
+ object.to_s
+ end
+ elsif object.nil?
+ return ""
+ else
+ object.to_s
+ end
+ end
+
+ # A node-set is converted to a string by
+ # returning the concatenation of the string-value
+ # of each of the children of the node in the
+ # node-set that is first in document order.
+ # If the node-set is empty, an empty string is returned.
+ def Functions::string_value( o )
+ rv = ""
+ o.children.each { |e|
+ if e.node_type == :text
+ rv << e.to_s
+ elsif e.node_type == :element
+ rv << string_value( e )
+ end
+ }
+ rv
+ end
+
+ # UNTESTED
+ def Functions::concat( *objects )
+ objects.join
+ end
+
+ # Fixed by Mike Stok
+ def Functions::starts_with( string, test )
+ string(string).index(string(test)) == 0
+ end
+
+ # Fixed by Mike Stok
+ def Functions::contains( string, test )
+ string(string).include?(string(test))
+ end
+
+ # Kouhei fixed this
+ def Functions::substring_before( string, test )
+ ruby_string = string(string)
+ ruby_index = ruby_string.index(string(test))
+ if ruby_index.nil?
+ ""
+ else
+ ruby_string[ 0...ruby_index ]
+ end
+ end
+
+ # Kouhei fixed this too
+ def Functions::substring_after( string, test )
+ ruby_string = string(string)
+ return $1 if ruby_string =~ /#{test}(.*)/
+ ""
+ end
+
+ # Take equal portions of Mike Stok and Sean Russell; mix
+ # vigorously, and pour into a tall, chilled glass. Serves 10,000.
+ def Functions::substring( string, start, length=nil )
+ ruby_string = string(string)
+ ruby_length = if length.nil?
+ ruby_string.length.to_f
+ else
+ number(length)
+ end
+ ruby_start = number(start)
+
+ # Handle the special cases
+ return '' if (
+ ruby_length.nan? or
+ ruby_start.nan? or
+ ruby_start.infinite?
+ )
+
+ infinite_length = ruby_length.infinite? == 1
+ ruby_length = ruby_string.length if infinite_length
+
+ # Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
+ # are 0..length. Therefore, we have to offset the bounds by one.
+ ruby_start = ruby_start.round - 1
+ ruby_length = ruby_length.round
+
+ if ruby_start < 0
+ ruby_length += ruby_start unless infinite_length
+ ruby_start = 0
+ end
+ return '' if ruby_length <= 0
+ ruby_string[ruby_start,ruby_length]
+ end
+
+ # UNTESTED
+ def Functions::string_length( string )
+ string(string).length
+ end
+
+ # UNTESTED
+ def Functions::normalize_space( string=nil )
+ string = string(@@context[:node]) if string.nil?
+ if string.kind_of? Array
+ string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
+ else
+ string.to_s.strip.gsub(/\s+/um, ' ')
+ end
+ end
+
+ # This is entirely Mike Stok's beast
+ def Functions::translate( string, tr1, tr2 )
+ from = string(tr1)
+ to = string(tr2)
+
+ # the map is our translation table.
+ #
+ # if a character occurs more than once in the
+ # from string then we ignore the second &
+ # subsequent mappings
+ #
+ # if a character maps to nil then we delete it
+ # in the output. This happens if the from
+ # string is longer than the to string
+ #
+ # there's nothing about - or ^ being special in
+ # http://www.w3.org/TR/xpath#function-translate
+ # so we don't build ranges or negated classes
+
+ map = Hash.new
+ 0.upto(from.length - 1) { |pos|
+ from_char = from[pos]
+ unless map.has_key? from_char
+ map[from_char] =
+ if pos < to.length
+ to[pos]
+ else
+ nil
+ end
+ end
+ }
+
+ if ''.respond_to? :chars
+ string(string).chars.collect { |c|
+ if map.has_key? c then map[c] else c end
+ }.compact.join
+ else
+ string(string).unpack('U*').collect { |c|
+ if map.has_key? c then map[c] else c end
+ }.compact.pack('U*')
+ end
+ end
+
+ # UNTESTED
+ def Functions::boolean( object=nil )
+ if object.kind_of? String
+ if object =~ /\d+/u
+ return object.to_f != 0
+ else
+ return object.size > 0
+ end
+ elsif object.kind_of? Array
+ object = object.find{|x| x and true}
+ end
+ return object ? true : false
+ end
+
+ # UNTESTED
+ def Functions::not( object )
+ not boolean( object )
+ end
+
+ # UNTESTED
+ def Functions::true( )
+ true
+ end
+
+ # UNTESTED
+ def Functions::false( )
+ false
+ end
+
+ # UNTESTED
+ def Functions::lang( language )
+ lang = false
+ node = @@context[:node]
+ attr = nil
+ until node.nil?
+ if node.node_type == :element
+ attr = node.attributes["xml:lang"]
+ unless attr.nil?
+ lang = compare_language(string(language), attr)
+ break
+ else
+ end
+ end
+ node = node.parent
+ end
+ lang
+ end
+
+ def Functions::compare_language lang1, lang2
+ lang2.downcase.index(lang1.downcase) == 0
+ end
+
+ # a string that consists of optional whitespace followed by an optional
+ # minus sign followed by a Number followed by whitespace is converted to
+ # the IEEE 754 number that is nearest (according to the IEEE 754
+ # round-to-nearest rule) to the mathematical value represented by the
+ # string; any other string is converted to NaN
+ #
+ # boolean true is converted to 1; boolean false is converted to 0
+ #
+ # a node-set is first converted to a string as if by a call to the string
+ # function and then converted in the same way as a string argument
+ #
+ # an object of a type other than the four basic types is converted to a
+ # number in a way that is dependent on that type
+ def Functions::number( object=nil )
+ object = @@context[:node] unless object
+ case object
+ when true
+ Float(1)
+ when false
+ Float(0)
+ when Array
+ number(string( object ))
+ when Numeric
+ object.to_f
+ else
+ str = string( object )
+ # If XPath ever gets scientific notation...
+ #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
+ if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
+ str.to_f
+ else
+ (0.0 / 0.0)
+ end
+ end
+ end
+
+ def Functions::sum( nodes )
+ nodes = [nodes] unless nodes.kind_of? Array
+ nodes.inject(0) { |r,n| r + number(string(n)) }
+ end
+
+ def Functions::floor( number )
+ number(number).floor
+ end
+
+ def Functions::ceiling( number )
+ number(number).ceil
+ end
+
+ def Functions::round( number )
+ begin
+ number(number).round
+ rescue FloatDomainError
+ number(number)
+ end
+ end
+
+ def Functions::processing_instruction( node )
+ node.node_type == :processing_instruction
+ end
+
+ def Functions::method_missing( id )
+ puts "METHOD MISSING #{id.id2name}"
+ XPath.match( @@context[:node], id.id2name )
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/instruction.rb b/jni/ruby/lib/rexml/instruction.rb
new file mode 100644
index 0000000..f8b734a
--- /dev/null
+++ b/jni/ruby/lib/rexml/instruction.rb
@@ -0,0 +1,70 @@
+require "rexml/child"
+require "rexml/source"
+
+module REXML
+ # Represents an XML Instruction; IE, <? ... ?>
+ # TODO: Add parent arg (3rd arg) to constructor
+ class Instruction < Child
+ START = '<\?'
+ STOP = '\?>'
+
+ # target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
+ # content is everything else.
+ attr_accessor :target, :content
+
+ # Constructs a new Instruction
+ # @param target can be one of a number of things. If String, then
+ # the target of this instruction is set to this. If an Instruction,
+ # then the Instruction is shallowly cloned (target and content are
+ # copied). If a Source, then the source is scanned and parsed for
+ # an Instruction declaration.
+ # @param content Must be either a String, or a Parent. Can only
+ # be a Parent if the target argument is a Source. Otherwise, this
+ # String is set as the content of this instruction.
+ def initialize(target, content=nil)
+ if target.kind_of? String
+ super()
+ @target = target
+ @content = content
+ elsif target.kind_of? Instruction
+ super(content)
+ @target = target.target
+ @content = target.content
+ end
+ @content.strip! if @content
+ end
+
+ def clone
+ Instruction.new self
+ end
+
+ # == DEPRECATED
+ # See the rexml/formatters package
+ #
+ def write writer, indent=-1, transitive=false, ie_hack=false
+ Kernel.warn( "#{self.class.name}.write is deprecated" )
+ indent(writer, indent)
+ writer << START.sub(/\\/u, '')
+ writer << @target
+ writer << ' '
+ writer << @content
+ writer << STOP.sub(/\\/u, '')
+ end
+
+ # @return true if other is an Instruction, and the content and target
+ # of the other matches the target and content of this object.
+ def ==( other )
+ other.kind_of? Instruction and
+ other.target == @target and
+ other.content == @content
+ end
+
+ def node_type
+ :processing_instruction
+ end
+
+ def inspect
+ "<?p-i #{target} ...?>"
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/light/node.rb b/jni/ruby/lib/rexml/light/node.rb
new file mode 100644
index 0000000..b33f78f
--- /dev/null
+++ b/jni/ruby/lib/rexml/light/node.rb
@@ -0,0 +1,195 @@
+require 'rexml/xmltokens'
+
+# [ :element, parent, name, attributes, children* ]
+ # a = Node.new
+ # a << "B" # => <a>B</a>
+ # a.b # => <a>B<b/></a>
+ # a.b[1] # => <a>B<b/><b/><a>
+ # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
+ # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
+ # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
+module REXML
+ module Light
+ # Represents a tagged XML element. Elements are characterized by
+ # having children, attributes, and names, and can themselves be
+ # children.
+ class Node
+ NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
+ PARENTS = [ :element, :document, :doctype ]
+ # Create a new element.
+ def initialize node=nil
+ @node = node
+ if node.kind_of? String
+ node = [ :text, node ]
+ elsif node.nil?
+ node = [ :document, nil, nil ]
+ elsif node[0] == :start_element
+ node[0] = :element
+ elsif node[0] == :start_doctype
+ node[0] = :doctype
+ elsif node[0] == :start_document
+ node[0] = :document
+ end
+ end
+
+ def size
+ if PARENTS.include? @node[0]
+ @node[-1].size
+ else
+ 0
+ end
+ end
+
+ def each
+ size.times { |x| yield( at(x+4) ) }
+ end
+
+ def name
+ at(2)
+ end
+
+ def name=( name_str, ns=nil )
+ pfx = ''
+ pfx = "#{prefix(ns)}:" if ns
+ _old_put(2, "#{pfx}#{name_str}")
+ end
+
+ def parent=( node )
+ _old_put(1,node)
+ end
+
+ def local_name
+ namesplit
+ @name
+ end
+
+ def local_name=( name_str )
+ _old_put( 1, "#@prefix:#{name_str}" )
+ end
+
+ def prefix( namespace=nil )
+ prefix_of( self, namespace )
+ end
+
+ def namespace( prefix=prefix() )
+ namespace_of( self, prefix )
+ end
+
+ def namespace=( namespace )
+ @prefix = prefix( namespace )
+ pfx = ''
+ pfx = "#@prefix:" if @prefix.size > 0
+ _old_put(1, "#{pfx}#@name")
+ end
+
+ def []( reference, ns=nil )
+ if reference.kind_of? String
+ pfx = ''
+ pfx = "#{prefix(ns)}:" if ns
+ at(3)["#{pfx}#{reference}"]
+ elsif reference.kind_of? Range
+ _old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
+ else
+ _old_get( 4+reference )
+ end
+ end
+
+ def =~( path )
+ XPath.match( self, path )
+ end
+
+ # Doesn't handle namespaces yet
+ def []=( reference, ns, value=nil )
+ if reference.kind_of? String
+ value = ns unless value
+ at( 3 )[reference] = value
+ elsif reference.kind_of? Range
+ _old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
+ else
+ if value
+ _old_put( 4+reference, ns, value )
+ else
+ _old_put( 4+reference, ns )
+ end
+ end
+ end
+
+ # Append a child to this element, optionally under a provided namespace.
+ # The namespace argument is ignored if the element argument is an Element
+ # object. Otherwise, the element argument is a string, the namespace (if
+ # provided) is the namespace the element is created in.
+ def << element
+ if node_type() == :text
+ at(-1) << element
+ else
+ newnode = Node.new( element )
+ newnode.parent = self
+ self.push( newnode )
+ end
+ at(-1)
+ end
+
+ def node_type
+ _old_get(0)
+ end
+
+ def text=( foo )
+ replace = at(4).kind_of?(String)? 1 : 0
+ self._old_put(4,replace, normalizefoo)
+ end
+
+ def root
+ context = self
+ context = context.at(1) while context.at(1)
+ end
+
+ def has_name?( name, namespace = '' )
+ at(3) == name and namespace() == namespace
+ end
+
+ def children
+ self
+ end
+
+ def parent
+ at(1)
+ end
+
+ def to_s
+
+ end
+
+ private
+
+ def namesplit
+ return if @name.defined?
+ at(2) =~ NAMESPLIT
+ @prefix = '' || $1
+ @name = $2
+ end
+
+ def namespace_of( node, prefix=nil )
+ if not prefix
+ name = at(2)
+ name =~ NAMESPLIT
+ prefix = $1
+ end
+ to_find = 'xmlns'
+ to_find = "xmlns:#{prefix}" if not prefix.nil?
+ ns = at(3)[ to_find ]
+ ns ? ns : namespace_of( @node[0], prefix )
+ end
+
+ def prefix_of( node, namespace=nil )
+ if not namespace
+ name = node.name
+ name =~ NAMESPLIT
+ $1
+ else
+ ns = at(3).find { |k,v| v == namespace }
+ ns ? ns : prefix_of( node.parent, namespace )
+ end
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/namespace.rb b/jni/ruby/lib/rexml/namespace.rb
new file mode 100644
index 0000000..aeb339e
--- /dev/null
+++ b/jni/ruby/lib/rexml/namespace.rb
@@ -0,0 +1,47 @@
+require 'rexml/xmltokens'
+
+module REXML
+ # Adds named attributes to an object.
+ module Namespace
+ # The name of the object, valid if set
+ attr_reader :name, :expanded_name
+ # The expanded name of the object, valid if name is set
+ attr_accessor :prefix
+ include XMLTokens
+ NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
+
+ # Sets the name and the expanded name
+ def name=( name )
+ @expanded_name = name
+ name =~ NAMESPLIT
+ if $1
+ @prefix = $1
+ else
+ @prefix = ""
+ @namespace = ""
+ end
+ @name = $2
+ end
+
+ # Compares names optionally WITH namespaces
+ def has_name?( other, ns=nil )
+ if ns
+ return (namespace() == ns and name() == other)
+ elsif other.include? ":"
+ return fully_expanded_name == other
+ else
+ return name == other
+ end
+ end
+
+ alias :local_name :name
+
+ # Fully expand the name, even if the prefix wasn't specified in the
+ # source file.
+ def fully_expanded_name
+ ns = prefix
+ return "#{ns}:#@name" if ns.size > 0
+ return @name
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/node.rb b/jni/ruby/lib/rexml/node.rb
new file mode 100644
index 0000000..cab6e9f
--- /dev/null
+++ b/jni/ruby/lib/rexml/node.rb
@@ -0,0 +1,75 @@
+require "rexml/parseexception"
+require "rexml/formatters/pretty"
+require "rexml/formatters/default"
+
+module REXML
+ # Represents a node in the tree. Nodes are never encountered except as
+ # superclasses of other objects. Nodes have siblings.
+ module Node
+ # @return the next sibling (nil if unset)
+ def next_sibling_node
+ return nil if @parent.nil?
+ @parent[ @parent.index(self) + 1 ]
+ end
+
+ # @return the previous sibling (nil if unset)
+ def previous_sibling_node
+ return nil if @parent.nil?
+ ind = @parent.index(self)
+ return nil if ind == 0
+ @parent[ ind - 1 ]
+ end
+
+ # indent::
+ # *DEPRECATED* This parameter is now ignored. See the formatters in the
+ # REXML::Formatters package for changing the output style.
+ def to_s indent=nil
+ unless indent.nil?
+ Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
+ f = REXML::Formatters::Pretty.new( indent )
+ f.write( self, rv = "" )
+ else
+ f = REXML::Formatters::Default.new
+ f.write( self, rv = "" )
+ end
+ return rv
+ end
+
+ def indent to, ind
+ if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
+ indentstyle = @parent.context[:indentstyle]
+ else
+ indentstyle = ' '
+ end
+ to << indentstyle*ind unless ind<1
+ end
+
+ def parent?
+ false;
+ end
+
+
+ # Visit all subnodes of +self+ recursively
+ def each_recursive(&block) # :yields: node
+ self.elements.each {|node|
+ block.call(node)
+ node.each_recursive(&block)
+ }
+ end
+
+ # Find (and return) first subnode (recursively) for which the block
+ # evaluates to true. Returns +nil+ if none was found.
+ def find_first_recursive(&block) # :yields: node
+ each_recursive {|node|
+ return node if block.call(node)
+ }
+ return nil
+ end
+
+ # Returns the position that +self+ holds in its parent's array, indexed
+ # from 1.
+ def index_in_parent
+ parent.index(self)+1
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/output.rb b/jni/ruby/lib/rexml/output.rb
new file mode 100644
index 0000000..0c6cc7a
--- /dev/null
+++ b/jni/ruby/lib/rexml/output.rb
@@ -0,0 +1,29 @@
+require 'rexml/encoding'
+
+module REXML
+ class Output
+ include Encoding
+
+ attr_reader :encoding
+
+ def initialize real_IO, encd="iso-8859-1"
+ @output = real_IO
+ self.encoding = encd
+
+ @to_utf = encoding != 'UTF-8'
+
+ if encoding == "UTF-16"
+ @output << "\ufeff".encode("UTF-16BE")
+ self.encoding = "UTF-16BE"
+ end
+ end
+
+ def <<( content )
+ @output << (@to_utf ? self.encode(content) : content)
+ end
+
+ def to_s
+ "Output[#{encoding}]"
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parent.rb b/jni/ruby/lib/rexml/parent.rb
new file mode 100644
index 0000000..2a07fca
--- /dev/null
+++ b/jni/ruby/lib/rexml/parent.rb
@@ -0,0 +1,165 @@
+require "rexml/child"
+
+module REXML
+ # A parent has children, and has methods for accessing them. The Parent
+ # class is never encountered except as the superclass for some other
+ # object.
+ class Parent < Child
+ include Enumerable
+
+ # Constructor
+ # @param parent if supplied, will be set as the parent of this object
+ def initialize parent=nil
+ super(parent)
+ @children = []
+ end
+
+ def add( object )
+ object.parent = self
+ @children << object
+ object
+ end
+
+ alias :push :add
+ alias :<< :push
+
+ def unshift( object )
+ object.parent = self
+ @children.unshift object
+ end
+
+ def delete( object )
+ found = false
+ @children.delete_if {|c| c.equal?(object) and found = true }
+ object.parent = nil if found
+ found ? object : nil
+ end
+
+ def each(&block)
+ @children.each(&block)
+ end
+
+ def delete_if( &block )
+ @children.delete_if(&block)
+ end
+
+ def delete_at( index )
+ @children.delete_at index
+ end
+
+ def each_index( &block )
+ @children.each_index(&block)
+ end
+
+ # Fetches a child at a given index
+ # @param index the Integer index of the child to fetch
+ def []( index )
+ @children[index]
+ end
+
+ alias :each_child :each
+
+
+
+ # Set an index entry. See Array.[]=
+ # @param index the index of the element to set
+ # @param opt either the object to set, or an Integer length
+ # @param child if opt is an Integer, this is the child to set
+ # @return the parent (self)
+ def []=( *args )
+ args[-1].parent = self
+ @children[*args[0..-2]] = args[-1]
+ end
+
+ # Inserts an child before another child
+ # @param child1 this is either an xpath or an Element. If an Element,
+ # child2 will be inserted before child1 in the child list of the parent.
+ # If an xpath, child2 will be inserted before the first child to match
+ # the xpath.
+ # @param child2 the child to insert
+ # @return the parent (self)
+ def insert_before( child1, child2 )
+ if child1.kind_of? String
+ child1 = XPath.first( self, child1 )
+ child1.parent.insert_before child1, child2
+ else
+ ind = index(child1)
+ child2.parent.delete(child2) if child2.parent
+ @children[ind,0] = child2
+ child2.parent = self
+ end
+ self
+ end
+
+ # Inserts an child after another child
+ # @param child1 this is either an xpath or an Element. If an Element,
+ # child2 will be inserted after child1 in the child list of the parent.
+ # If an xpath, child2 will be inserted after the first child to match
+ # the xpath.
+ # @param child2 the child to insert
+ # @return the parent (self)
+ def insert_after( child1, child2 )
+ if child1.kind_of? String
+ child1 = XPath.first( self, child1 )
+ child1.parent.insert_after child1, child2
+ else
+ ind = index(child1)+1
+ child2.parent.delete(child2) if child2.parent
+ @children[ind,0] = child2
+ child2.parent = self
+ end
+ self
+ end
+
+ def to_a
+ @children.dup
+ end
+
+ # Fetches the index of a given child
+ # @param child the child to get the index of
+ # @return the index of the child, or nil if the object is not a child
+ # of this parent.
+ def index( child )
+ count = -1
+ @children.find { |i| count += 1 ; i.hash == child.hash }
+ count
+ end
+
+ # @return the number of children of this parent
+ def size
+ @children.size
+ end
+
+ alias :length :size
+
+ # Replaces one child with another, making sure the nodelist is correct
+ # @param to_replace the child to replace (must be a Child)
+ # @param replacement the child to insert into the nodelist (must be a
+ # Child)
+ def replace_child( to_replace, replacement )
+ @children.map! {|c| c.equal?( to_replace ) ? replacement : c }
+ to_replace.parent = nil
+ replacement.parent = self
+ end
+
+ # Deeply clones this object. This creates a complete duplicate of this
+ # Parent, including all descendants.
+ def deep_clone
+ cl = clone()
+ each do |child|
+ if child.kind_of? Parent
+ cl << child.deep_clone
+ else
+ cl << child.clone
+ end
+ end
+ cl
+ end
+
+ alias :children :to_a
+
+ def parent?
+ true
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parseexception.rb b/jni/ruby/lib/rexml/parseexception.rb
new file mode 100644
index 0000000..0c4d55a
--- /dev/null
+++ b/jni/ruby/lib/rexml/parseexception.rb
@@ -0,0 +1,51 @@
+module REXML
+ class ParseException < RuntimeError
+ attr_accessor :source, :parser, :continued_exception
+
+ def initialize( message, source=nil, parser=nil, exception=nil )
+ super(message)
+ @source = source
+ @parser = parser
+ @continued_exception = exception
+ end
+
+ def to_s
+ # Quote the original exception, if there was one
+ if @continued_exception
+ err = @continued_exception.inspect
+ err << "\n"
+ err << @continued_exception.backtrace.join("\n")
+ err << "\n...\n"
+ else
+ err = ""
+ end
+
+ # Get the stack trace and error message
+ err << super
+
+ # Add contextual information
+ if @source
+ err << "\nLine: #{line}\n"
+ err << "Position: #{position}\n"
+ err << "Last 80 unconsumed characters:\n"
+ err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
+ end
+
+ err
+ end
+
+ def position
+ @source.current_line[0] if @source and defined? @source.current_line and
+ @source.current_line
+ end
+
+ def line
+ @source.current_line[2] if @source and defined? @source.current_line and
+ @source.current_line
+ end
+
+ def context
+ @source.current_line
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/baseparser.rb b/jni/ruby/lib/rexml/parsers/baseparser.rb
new file mode 100644
index 0000000..6a08b86
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/baseparser.rb
@@ -0,0 +1,532 @@
+require 'rexml/parseexception'
+require 'rexml/undefinednamespaceexception'
+require 'rexml/source'
+require 'set'
+
+module REXML
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class BaseParser
+ LETTER = '[:alpha:]'
+ DIGIT = '[:digit:]'
+
+ COMBININGCHAR = '' # TODO
+ EXTENDER = '' # TODO
+
+ NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*"
+ NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
+ UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+
+ NAMECHAR = '[\-\w\.:]'
+ NAME = "([\\w:]#{NAMECHAR}*)"
+ NMTOKEN = "(?:#{NAMECHAR})+"
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+ REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
+ REFERENCE_RE = /#{REFERENCE}/
+
+ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+ DOCTYPE_END = /\A\s*\]\s*>/um
+ DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+ ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
+ COMMENT_START = /\A<!--/u
+ COMMENT_PATTERN = /<!--(.*?)-->/um
+ CDATA_START = /\A<!\[CDATA\[/u
+ CDATA_END = /\A\s*\]\s*>/um
+ CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+ XMLDECL_START = /\A<\?xml\s/u;
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
+ INSTRUCTION_START = /\A<\?/u
+ INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+ TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
+ CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+
+ VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+ ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
+ STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
+
+ ENTITY_START = /\A\s*<!ENTITY/
+ IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
+ ELEMENTDECL_START = /\A\s*<!ELEMENT/um
+ ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
+ SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
+ ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+ NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+ ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+ ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+ ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+ DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+ ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+ ATTDEF_RE = /#{ATTDEF}/
+ ATTLISTDECL_START = /\A\s*<!ATTLIST/um
+ ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
+ PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
+ SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
+
+ TEXT_PATTERN = /\A([^<]*)/um
+
+ # Entity constants
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
+ PEREFERENCE = "%#{NAME};"
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+
+ EREFERENCE = /&(?!#{NAME};)/
+
+ DEFAULT_ENTITIES = {
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
+ }
+
+
+ ######################################################################
+ # These are patterns to identify common markup errors, to make the
+ # error messages more informative.
+ ######################################################################
+ MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
+
+ def initialize( source )
+ self.stream = source
+ @listeners = []
+ end
+
+ def add_listener( listener )
+ @listeners << listener
+ end
+
+ attr_reader :source
+
+ def stream=( source )
+ @source = SourceFactory.create_from( source )
+ @closed = nil
+ @document_status = nil
+ @tags = []
+ @stack = []
+ @entities = []
+ @nsstack = []
+ end
+
+ def position
+ if @source.respond_to? :position
+ @source.position
+ else
+ # FIXME
+ 0
+ end
+ end
+
+ # Returns true if there are no more events
+ def empty?
+ return (@source.empty? and @stack.empty?)
+ end
+
+ # Returns true if there are more events. Synonymous with !empty?
+ def has_next?
+ return !(@source.empty? and @stack.empty?)
+ end
+
+ # Push an event back on the head of the stream. This method
+ # has (theoretically) infinite depth.
+ def unshift token
+ @stack.unshift(token)
+ end
+
+ # Peek at the +depth+ event in the stack. The first element on the stack
+ # is at depth 0. If +depth+ is -1, will parse to the end of the input
+ # stream and return the last event, which is always :end_document.
+ # Be aware that this causes the stream to be parsed up to the +depth+
+ # event, so you can effectively pre-parse the entire document (pull the
+ # entire thing into memory) using this method.
+ def peek depth=0
+ raise %Q[Illegal argument "#{depth}"] if depth < -1
+ temp = []
+ if depth == -1
+ temp.push(pull()) until empty?
+ else
+ while @stack.size+temp.size < depth+1
+ temp.push(pull())
+ end
+ end
+ @stack += temp if temp.size > 0
+ @stack[depth]
+ end
+
+ # Returns the next event. This is a +PullEvent+ object.
+ def pull
+ pull_event.tap do |event|
+ @listeners.each do |listener|
+ listener.receive event
+ end
+ end
+ end
+
+ def pull_event
+ if @closed
+ x, @closed = @closed, nil
+ return [ :end_element, x ]
+ end
+ return [ :end_document ] if empty?
+ return @stack.shift if @stack.size > 0
+ #STDERR.puts @source.encoding
+ @source.read if @source.buffer.size<2
+ #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
+ if @document_status == nil
+ #@source.consume( /^\s*/um )
+ word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
+ word = word[1] unless word.nil?
+ #STDERR.puts "WORD = #{word.inspect}"
+ case word
+ when COMMENT_START
+ return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+ when XMLDECL_START
+ #STDERR.puts "XMLDECL"
+ results = @source.match( XMLDECL_PATTERN, true )[1]
+ version = VERSION.match( results )
+ version = version[1] unless version.nil?
+ encoding = ENCODING.match(results)
+ encoding = encoding[1] unless encoding.nil?
+ if need_source_encoding_update?(encoding)
+ @source.encoding = encoding
+ end
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
+ encoding = "UTF-16"
+ end
+ standalone = STANDALONE.match(results)
+ standalone = standalone[1] unless standalone.nil?
+ return [ :xmldecl, version, encoding, standalone ]
+ when INSTRUCTION_START
+ return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+ when DOCTYPE_START
+ md = @source.match( DOCTYPE_PATTERN, true )
+ @nsstack.unshift(curr_ns=Set.new)
+ identity = md[1]
+ close = md[2]
+ identity =~ IDENTITY
+ name = $1
+ raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
+ pub_sys = $2.nil? ? nil : $2.strip
+ long_name = $4.nil? ? nil : $4.strip
+ uri = $6.nil? ? nil : $6.strip
+ args = [ :start_doctype, name, pub_sys, long_name, uri ]
+ if close == ">"
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size<2
+ md = @source.match(/^\s*/um, true)
+ @stack << [ :end_doctype ]
+ else
+ @document_status = :in_doctype
+ end
+ return args
+ when /^\s+/
+ else
+ @document_status = :after_doctype
+ @source.read if @source.buffer.size<2
+ md = @source.match(/\s*/um, true)
+ if @source.encoding == "UTF-8"
+ @source.buffer.force_encoding(::Encoding::UTF_8)
+ end
+ end
+ end
+ if @document_status == :in_doctype
+ md = @source.match(/\s*(.*?>)/um)
+ case md[1]
+ when SYSTEMENTITY
+ match = @source.match( SYSTEMENTITY, true )[1]
+ return [ :externalentity, match ]
+
+ when ELEMENTDECL_START
+ return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+
+ when ENTITY_START
+ match = @source.match( ENTITYDECL, true ).to_a.compact
+ match[0] = :entitydecl
+ ref = false
+ if match[1] == '%'
+ ref = true
+ match.delete_at 1
+ end
+ # Now we have to sort out what kind of entity reference this is
+ if match[2] == 'SYSTEM'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+ elsif match[2] == 'PUBLIC'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match[4] = match[4][1..-2] # HREF
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
+ else
+ match[2] = match[2][1..-2]
+ match.pop if match.size == 4
+ # match is [ :entity, name, value ]
+ end
+ match << '%' if ref
+ return match
+ when ATTLISTDECL_START
+ md = @source.match( ATTLISTDECL_PATTERN, true )
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+ element = md[1]
+ contents = md[0]
+
+ pairs = {}
+ values = md[0].scan( ATTDEF_RE )
+ values.each do |attdef|
+ unless attdef[3] == "#IMPLIED"
+ attdef.compact!
+ val = attdef[3]
+ val = attdef[4] if val == "#FIXED "
+ pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @nsstack[0] << $1
+ end
+ end
+ end
+ return [ :attlistdecl, element, pairs, contents ]
+ when NOTATIONDECL_START
+ md = nil
+ if @source.match( PUBLIC )
+ md = @source.match( PUBLIC, true )
+ vals = [md[1],md[2],md[4],md[6]]
+ elsif @source.match( SYSTEM )
+ md = @source.match( SYSTEM, true )
+ vals = [md[1],md[2],nil,md[4]]
+ else
+ raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ end
+ return [ :notationdecl, *vals ]
+ when DOCTYPE_END
+ @document_status = :after_doctype
+ @source.match( DOCTYPE_END, true )
+ return [ :end_doctype ]
+ end
+ end
+ begin
+ if @source.buffer[0] == ?<
+ if @source.buffer[1] == ?/
+ @nsstack.shift
+ last_tag = @tags.pop
+ #md = @source.match_to_consume( '>', CLOSE_MATCH)
+ md = @source.match( CLOSE_MATCH, true )
+ raise REXML::ParseException.new( "Missing end tag for "+
+ "'#{last_tag}' (got \"#{md[1]}\")",
+ @source) unless last_tag == md[1]
+ return [ :end_element, last_tag ]
+ elsif @source.buffer[1] == ?!
+ md = @source.match(/\A(\s*[^>]*>)/um)
+ #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+ raise REXML::ParseException.new("Malformed node", @source) unless md
+ if md[0][2] == ?-
+ md = @source.match( COMMENT_PATTERN, true )
+
+ case md[1]
+ when /--/, /-\z/
+ raise REXML::ParseException.new("Malformed comment", @source)
+ end
+
+ return [ :comment, md[1] ] if md
+ else
+ md = @source.match( CDATA_PATTERN, true )
+ return [ :cdata, md[1] ] if md
+ end
+ raise REXML::ParseException.new( "Declarations can only occur "+
+ "in the doctype declaration.", @source)
+ elsif @source.buffer[1] == ??
+ md = @source.match( INSTRUCTION_PATTERN, true )
+ return [ :processing_instruction, md[1], md[2] ] if md
+ raise REXML::ParseException.new( "Bad instruction declaration",
+ @source)
+ else
+ # Get the next tag
+ md = @source.match(TAG_MATCH, true)
+ unless md
+ # Check for missing attribute quotes
+ raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source)
+ end
+ attributes = {}
+ prefixes = Set.new
+ prefixes << md[2] if md[2]
+ @nsstack.unshift(curr_ns=Set.new)
+ if md[4].size > 0
+ attrs = md[4].scan( ATTRIBUTE_PATTERN )
+ raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+ attrs.each do |attr_name, prefix, local_part, quote, value|
+ if prefix == "xmlns"
+ if local_part == "xml"
+ if value != "http://www.w3.org/XML/1998/namespace"
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif local_part == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self)
+ end
+ curr_ns << local_part
+ elsif prefix
+ prefixes << prefix unless prefix == "xml"
+ end
+
+ if attributes.has_key?(attr_name)
+ msg = "Duplicate attribute #{attr_name.inspect}"
+ raise REXML::ParseException.new(msg, @source, self)
+ end
+
+ attributes[attr_name] = value
+ end
+ end
+
+ # Verify that all of the prefixes have been defined
+ for prefix in prefixes
+ unless @nsstack.find{|k| k.member?(prefix)}
+ raise UndefinedNamespaceException.new(prefix,@source,self)
+ end
+ end
+
+ if md[6]
+ @closed = md[1]
+ @nsstack.shift
+ else
+ @tags.push( md[1] )
+ end
+ return [ :start_element, md[1], attributes ]
+ end
+ else
+ md = @source.match( TEXT_PATTERN, true )
+ if md[0].length == 0
+ @source.match( /(\s+)/, true )
+ end
+ #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
+ #return [ :text, "" ] if md[0].length == 0
+ # unnormalized = Text::unnormalize( md[1], self )
+ # return PullEvent.new( :text, md[1], unnormalized )
+ return [ :text, md[1] ]
+ end
+ rescue REXML::UndefinedNamespaceException
+ raise
+ rescue REXML::ParseException
+ raise
+ rescue Exception, NameError => error
+ raise REXML::ParseException.new( "Exception parsing",
+ @source, self, (error ? error : $!) )
+ end
+ return [ :dummy ]
+ end
+ private :pull_event
+
+ def entity( reference, entities )
+ value = nil
+ value = entities[ reference ] if entities
+ if not value
+ value = DEFAULT_ENTITIES[ reference ]
+ value = value[2] if value
+ end
+ unnormalize( value, entities ) if value
+ end
+
+ # Escapes all possible entities
+ def normalize( input, entities=nil, entity_filter=nil )
+ copy = input.clone
+ # Doing it like this rather than in a loop improves the speed
+ copy.gsub!( EREFERENCE, '&amp;' )
+ entities.each do |key, value|
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
+ entity_filter.include?(entity)
+ end if entities
+ copy.gsub!( EREFERENCE, '&amp;' )
+ DEFAULT_ENTITIES.each do |key, value|
+ copy.gsub!( value[3], value[1] )
+ end
+ copy
+ end
+
+ # Unescapes all possible entities
+ def unnormalize( string, entities=nil, filter=nil )
+ rv = string.clone
+ rv.gsub!( /\r\n?/, "\n" )
+ matches = rv.scan( REFERENCE_RE )
+ return rv if matches.size == 0
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
+ m=$1
+ m = "0#{m}" if m[0] == ?x
+ [Integer(m)].pack('U*')
+ }
+ matches.collect!{|x|x[0]}.compact!
+ if matches.size > 0
+ matches.each do |entity_reference|
+ unless filter and filter.include?(entity_reference)
+ entity_value = entity( entity_reference, entities )
+ if entity_value
+ re = /&#{entity_reference};/
+ rv.gsub!( re, entity_value )
+ else
+ er = DEFAULT_ENTITIES[entity_reference]
+ rv.gsub!( er[0], er[2] ) if er
+ end
+ end
+ end
+ rv.gsub!( /&amp;/, '&' )
+ end
+ rv
+ end
+
+ private
+ def need_source_encoding_update?(xml_declaration_encoding)
+ return false if xml_declaration_encoding.nil?
+ return false if /\AUTF-16\z/i =~ xml_declaration_encoding
+ true
+ end
+ end
+ end
+end
+
+=begin
+ case event[0]
+ when :start_element
+ when :text
+ when :end_element
+ when :processing_instruction
+ when :cdata
+ when :comment
+ when :xmldecl
+ when :start_doctype
+ when :end_doctype
+ when :externalentity
+ when :elementdecl
+ when :entity
+ when :attlistdecl
+ when :notationdecl
+ when :end_doctype
+ end
+=end
diff --git a/jni/ruby/lib/rexml/parsers/lightparser.rb b/jni/ruby/lib/rexml/parsers/lightparser.rb
new file mode 100644
index 0000000..8104168
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/lightparser.rb
@@ -0,0 +1,58 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+require 'rexml/light/node'
+
+module REXML
+ module Parsers
+ class LightParser
+ def initialize stream
+ @stream = stream
+ @parser = REXML::Parsers::BaseParser.new( stream )
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ def rewind
+ @stream.rewind
+ @parser.stream = @stream
+ end
+
+ def parse
+ root = context = [ :document ]
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ break
+ when :start_element, :start_doctype
+ new_node = event
+ context << new_node
+ new_node[1,0] = [context]
+ context = new_node
+ when :end_element, :end_doctype
+ context = context[1]
+ else
+ new_node = event
+ context << new_node
+ new_node[1,0] = [context]
+ end
+ end
+ root
+ end
+ end
+
+ # An element is an array. The array contains:
+ # 0 The parent element
+ # 1 The tag name
+ # 2 A hash of attributes
+ # 3..-1 The child elements
+ # An element is an array of size > 3
+ # Text is a String
+ # PIs are [ :processing_instruction, target, data ]
+ # Comments are [ :comment, data ]
+ # DocTypes are DocType structs
+ # The root is an array with XMLDecls, Text, DocType, Array, Text
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/pullparser.rb b/jni/ruby/lib/rexml/parsers/pullparser.rb
new file mode 100644
index 0000000..68a4ff7
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/pullparser.rb
@@ -0,0 +1,196 @@
+require 'forwardable'
+
+require 'rexml/parseexception'
+require 'rexml/parsers/baseparser'
+require 'rexml/xmltokens'
+
+module REXML
+ module Parsers
+ # = Using the Pull Parser
+ # <em>This API is experimental, and subject to change.</em>
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+ # while parser.has_next?
+ # res = parser.next
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+ # end
+ # See the PullEvent class for information on the content of the results.
+ # The data is identical to the arguments passed for the various events to
+ # the StreamListener API.
+ #
+ # Notice that:
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
+ # while parser.has_next?
+ # res = parser.next
+ # raise res[1] if res.error?
+ # end
+ #
+ # Nat Price gave me some good ideas for the API.
+ class PullParser
+ include XMLTokens
+ extend Forwardable
+
+ def_delegators( :@parser, :has_next? )
+ def_delegators( :@parser, :entity )
+ def_delegators( :@parser, :empty? )
+ def_delegators( :@parser, :source )
+
+ def initialize stream
+ @entities = {}
+ @listeners = nil
+ @parser = BaseParser.new( stream )
+ @my_stack = []
+ end
+
+ def add_listener( listener )
+ @listeners = [] unless @listeners
+ @listeners << listener
+ end
+
+ def each
+ while has_next?
+ yield self.pull
+ end
+ end
+
+ def peek depth=0
+ if @my_stack.length <= depth
+ (depth - @my_stack.length + 1).times {
+ e = PullEvent.new(@parser.pull)
+ @my_stack.push(e)
+ }
+ end
+ @my_stack[depth]
+ end
+
+ def pull
+ return @my_stack.shift if @my_stack.length > 0
+
+ event = @parser.pull
+ case event[0]
+ when :entitydecl
+ @entities[ event[1] ] =
+ event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+ when :text
+ unnormalized = @parser.unnormalize( event[1], @entities )
+ event << unnormalized
+ end
+ PullEvent.new( event )
+ end
+
+ def unshift token
+ @my_stack.unshift token
+ end
+ end
+
+ # A parsing event. The contents of the event are accessed as an +Array?,
+ # and the type is given either by the ...? methods, or by accessing the
+ # +type+ accessor. The contents of this object vary from event to event,
+ # but are identical to the arguments passed to +StreamListener+s for each
+ # event.
+ class PullEvent
+ # The type of this event. Will be one of :tag_start, :tag_end, :text,
+ # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+ # :notationdecl, :entity, :cdata, :xmldecl, or :error.
+ def initialize(arg)
+ @contents = arg
+ end
+
+ def []( start, endd=nil)
+ if start.kind_of? Range
+ @contents.slice( start.begin+1 .. start.end )
+ elsif start.kind_of? Numeric
+ if endd.nil?
+ @contents.slice( start+1 )
+ else
+ @contents.slice( start+1, endd )
+ end
+ else
+ raise "Illegal argument #{start.inspect} (#{start.class})"
+ end
+ end
+
+ def event_type
+ @contents[0]
+ end
+
+ # Content: [ String tag_name, Hash attributes ]
+ def start_element?
+ @contents[0] == :start_element
+ end
+
+ # Content: [ String tag_name ]
+ def end_element?
+ @contents[0] == :end_element
+ end
+
+ # Content: [ String raw_text, String unnormalized_text ]
+ def text?
+ @contents[0] == :text
+ end
+
+ # Content: [ String text ]
+ def instruction?
+ @contents[0] == :processing_instruction
+ end
+
+ # Content: [ String text ]
+ def comment?
+ @contents[0] == :comment
+ end
+
+ # Content: [ String name, String pub_sys, String long_name, String uri ]
+ def doctype?
+ @contents[0] == :start_doctype
+ end
+
+ # Content: [ String text ]
+ def attlistdecl?
+ @contents[0] == :attlistdecl
+ end
+
+ # Content: [ String text ]
+ def elementdecl?
+ @contents[0] == :elementdecl
+ end
+
+ # Due to the wonders of DTDs, an entity declaration can be just about
+ # anything. There's no way to normalize it; you'll have to interpret the
+ # content yourself. However, the following is true:
+ #
+ # * If the entity declaration is an internal entity:
+ # [ String name, String value ]
+ # Content: [ String text ]
+ def entitydecl?
+ @contents[0] == :entitydecl
+ end
+
+ # Content: [ String text ]
+ def notationdecl?
+ @contents[0] == :notationdecl
+ end
+
+ # Content: [ String text ]
+ def entity?
+ @contents[0] == :entity
+ end
+
+ # Content: [ String text ]
+ def cdata?
+ @contents[0] == :cdata
+ end
+
+ # Content: [ String version, String encoding, String standalone ]
+ def xmldecl?
+ @contents[0] == :xmldecl
+ end
+
+ def error?
+ @contents[0] == :error
+ end
+
+ def inspect
+ @contents[0].to_s + ": " + @contents[1..-1].inspect
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/sax2parser.rb b/jni/ruby/lib/rexml/parsers/sax2parser.rb
new file mode 100644
index 0000000..a72c0a7
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/sax2parser.rb
@@ -0,0 +1,272 @@
+require 'rexml/parsers/baseparser'
+require 'rexml/parseexception'
+require 'rexml/namespace'
+require 'rexml/text'
+
+module REXML
+ module Parsers
+ # SAX2Parser
+ class SAX2Parser
+ def initialize source
+ @parser = BaseParser.new(source)
+ @listeners = []
+ @procs = []
+ @namespace_stack = []
+ @has_listeners = false
+ @tag_stack = []
+ @entities = {}
+ end
+
+ def source
+ @parser.source
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ # Listen arguments:
+ #
+ # Symbol, Array, Block
+ # Listen to Symbol events on Array elements
+ # Symbol, Block
+ # Listen to Symbol events
+ # Array, Listener
+ # Listen to all events on Array elements
+ # Array, Block
+ # Listen to :start_element events on Array elements
+ # Listener
+ # Listen to All events
+ #
+ # Symbol can be one of: :start_element, :end_element,
+ # :start_prefix_mapping, :end_prefix_mapping, :characters,
+ # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
+ # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+ #
+ # There is an additional symbol that can be listened for: :progress.
+ # This will be called for every event generated, passing in the current
+ # stream position.
+ #
+ # Array contains regular expressions or strings which will be matched
+ # against fully qualified element names.
+ #
+ # Listener must implement the methods in SAX2Listener
+ #
+ # Block will be passed the same arguments as a SAX2Listener method would
+ # be, where the method name is the same as the matched Symbol.
+ # See the SAX2Listener for more information.
+ def listen( *args, &blok )
+ if args[0].kind_of? Symbol
+ if args.size == 2
+ args[1].each { |match| @procs << [args[0], match, blok] }
+ else
+ add( [args[0], nil, blok] )
+ end
+ elsif args[0].kind_of? Array
+ if args.size == 2
+ args[0].each { |match| add( [nil, match, args[1]] ) }
+ else
+ args[0].each { |match| add( [ :start_element, match, blok ] ) }
+ end
+ else
+ add([nil, nil, args[0]])
+ end
+ end
+
+ def deafen( listener=nil, &blok )
+ if listener
+ @listeners.delete_if {|item| item[-1] == listener }
+ @has_listeners = false if @listeners.size == 0
+ else
+ @procs.delete_if {|item| item[-1] == blok }
+ end
+ end
+
+ def parse
+ @procs.each { |sym,match,block| block.call if sym == :start_document }
+ @listeners.each { |sym,match,block|
+ block.start_document if sym == :start_document or sym.nil?
+ }
+ context = []
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ handle( :end_document )
+ break
+ when :start_doctype
+ handle( :doctype, *event[1..-1])
+ when :end_doctype
+ context = context[1]
+ when :start_element
+ @tag_stack.push(event[1])
+ # find the observers for namespaces
+ procs = get_procs( :start_prefix_mapping, event[1] )
+ listeners = get_listeners( :start_prefix_mapping, event[1] )
+ if procs or listeners
+ # break out the namespace declarations
+ # The attributes live in event[2]
+ event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
+ nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
+ nsdecl.collect! { |n, value| [ n[6..-1], value ] }
+ @namespace_stack.push({})
+ nsdecl.each do |n,v|
+ @namespace_stack[-1][n] = v
+ # notify observers of namespaces
+ procs.each { |ob| ob.call( n, v ) } if procs
+ listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
+ end
+ end
+ event[1] =~ Namespace::NAMESPLIT
+ prefix = $1
+ local = $2
+ uri = get_namespace(prefix)
+ # find the observers for start_element
+ procs = get_procs( :start_element, event[1] )
+ listeners = get_listeners( :start_element, event[1] )
+ # notify observers
+ procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
+ listeners.each { |ob|
+ ob.start_element( uri, local, event[1], event[2] )
+ } if listeners
+ when :end_element
+ @tag_stack.pop
+ event[1] =~ Namespace::NAMESPLIT
+ prefix = $1
+ local = $2
+ uri = get_namespace(prefix)
+ # find the observers for start_element
+ procs = get_procs( :end_element, event[1] )
+ listeners = get_listeners( :end_element, event[1] )
+ # notify observers
+ procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
+ listeners.each { |ob|
+ ob.end_element( uri, local, event[1] )
+ } if listeners
+
+ namespace_mapping = @namespace_stack.pop
+ # find the observers for namespaces
+ procs = get_procs( :end_prefix_mapping, event[1] )
+ listeners = get_listeners( :end_prefix_mapping, event[1] )
+ if procs or listeners
+ namespace_mapping.each do |ns_prefix, ns_uri|
+ # notify observers of namespaces
+ procs.each { |ob| ob.call( ns_prefix ) } if procs
+ listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners
+ end
+ end
+ when :text
+ #normalized = @parser.normalize( event[1] )
+ #handle( :characters, normalized )
+ copy = event[1].clone
+
+ esub = proc { |match|
+ if @entities.has_key?($1)
+ @entities[$1].gsub(Text::REFERENCE, &esub)
+ else
+ match
+ end
+ }
+
+ copy.gsub!( Text::REFERENCE, &esub )
+ copy.gsub!( Text::NUMERICENTITY ) {|m|
+ m=$1
+ m = "0#{m}" if m[0] == ?x
+ [Integer(m)].pack('U*')
+ }
+ handle( :characters, copy )
+ when :entitydecl
+ handle_entitydecl( event )
+ when :processing_instruction, :comment, :attlistdecl,
+ :elementdecl, :cdata, :notationdecl, :xmldecl
+ handle( *event )
+ end
+ handle( :progress, @parser.position )
+ end
+ end
+
+ private
+ def handle( symbol, *arguments )
+ tag = @tag_stack[-1]
+ procs = get_procs( symbol, tag )
+ listeners = get_listeners( symbol, tag )
+ # notify observers
+ procs.each { |ob| ob.call( *arguments ) } if procs
+ listeners.each { |l|
+ l.send( symbol.to_s, *arguments )
+ } if listeners
+ end
+
+ def handle_entitydecl( event )
+ @entities[ event[1] ] = event[2] if event.size == 3
+ parameter_reference_p = false
+ case event[2]
+ when "SYSTEM"
+ if event.size == 5
+ if event.last == "%"
+ parameter_reference_p = true
+ else
+ event[4, 0] = "NDATA"
+ end
+ end
+ when "PUBLIC"
+ if event.size == 6
+ if event.last == "%"
+ parameter_reference_p = true
+ else
+ event[5, 0] = "NDATA"
+ end
+ end
+ else
+ parameter_reference_p = (event.size == 4)
+ end
+ event[1, 0] = event.pop if parameter_reference_p
+ handle( event[0], event[1..-1] )
+ end
+
+ # The following methods are duplicates, but it is faster than using
+ # a helper
+ def get_procs( symbol, name )
+ return nil if @procs.size == 0
+ @procs.find_all do |sym, match, block|
+ (
+ (sym.nil? or symbol == sym) and
+ ((name.nil? and match.nil?) or match.nil? or (
+ (name == match) or
+ (match.kind_of? Regexp and name =~ match)
+ )
+ )
+ )
+ end.collect{|x| x[-1]}
+ end
+ def get_listeners( symbol, name )
+ return nil if @listeners.size == 0
+ @listeners.find_all do |sym, match, block|
+ (
+ (sym.nil? or symbol == sym) and
+ ((name.nil? and match.nil?) or match.nil? or (
+ (name == match) or
+ (match.kind_of? Regexp and name =~ match)
+ )
+ )
+ )
+ end.collect{|x| x[-1]}
+ end
+
+ def add( pair )
+ if pair[-1].respond_to? :call
+ @procs << pair unless @procs.include? pair
+ else
+ @listeners << pair unless @listeners.include? pair
+ @has_listeners = true
+ end
+ end
+
+ def get_namespace( prefix )
+ uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
+ (@namespace_stack.find { |ns| not ns[nil].nil? })
+ uris[-1][prefix] unless uris.nil? or 0 == uris.size
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/streamparser.rb b/jni/ruby/lib/rexml/parsers/streamparser.rb
new file mode 100644
index 0000000..9ea65ed
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/streamparser.rb
@@ -0,0 +1,52 @@
+require "rexml/parsers/baseparser"
+
+module REXML
+ module Parsers
+ class StreamParser
+ def initialize source, listener
+ @listener = listener
+ @parser = BaseParser.new( source )
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ def parse
+ # entity string
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ return
+ when :start_element
+ attrs = event[2].each do |n, v|
+ event[2][n] = @parser.unnormalize( v )
+ end
+ @listener.tag_start( event[1], attrs )
+ when :end_element
+ @listener.tag_end( event[1] )
+ when :text
+ normalized = @parser.unnormalize( event[1] )
+ @listener.text( normalized )
+ when :processing_instruction
+ @listener.instruction( *event[1,2] )
+ when :start_doctype
+ @listener.doctype( *event[1..-1] )
+ when :end_doctype
+ # FIXME: remove this condition for milestone:3.2
+ @listener.doctype_end if @listener.respond_to? :doctype_end
+ when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
+ @listener.send( event[0].to_s, *event[1..-1] )
+ when :entitydecl, :notationdecl
+ @listener.send( event[0].to_s, event[1..-1] )
+ when :externalentity
+ entity_reference = event[1]
+ content = entity_reference.gsub(/\A%|;\z/, "")
+ @listener.entity(content)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/treeparser.rb b/jni/ruby/lib/rexml/parsers/treeparser.rb
new file mode 100644
index 0000000..68edb77
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/treeparser.rb
@@ -0,0 +1,100 @@
+require 'rexml/validation/validationexception'
+require 'rexml/undefinednamespaceexception'
+
+module REXML
+ module Parsers
+ class TreeParser
+ def initialize( source, build_context = Document.new )
+ @build_context = build_context
+ @parser = Parsers::BaseParser.new( source )
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ def parse
+ tag_stack = []
+ in_doctype = false
+ entities = nil
+ begin
+ while true
+ event = @parser.pull
+ #STDERR.puts "TREEPARSER GOT #{event.inspect}"
+ case event[0]
+ when :end_document
+ unless tag_stack.empty?
+ raise ParseException.new("No close tag for #{@build_context.xpath}",
+ @parser.source, @parser)
+ end
+ return
+ when :start_element
+ tag_stack.push(event[1])
+ el = @build_context = @build_context.add_element( event[1] )
+ event[2].each do |key, value|
+ el.attributes[key]=Attribute.new(key,value,self)
+ end
+ when :end_element
+ tag_stack.pop
+ @build_context = @build_context.parent
+ when :text
+ if not in_doctype
+ if @build_context[-1].instance_of? Text
+ @build_context[-1] << event[1]
+ else
+ @build_context.add(
+ Text.new(event[1], @build_context.whitespace, nil, true)
+ ) unless (
+ @build_context.ignore_whitespace_nodes and
+ event[1].strip.size==0
+ )
+ end
+ end
+ when :comment
+ c = Comment.new( event[1] )
+ @build_context.add( c )
+ when :cdata
+ c = CData.new( event[1] )
+ @build_context.add( c )
+ when :processing_instruction
+ @build_context.add( Instruction.new( event[1], event[2] ) )
+ when :end_doctype
+ in_doctype = false
+ entities.each { |k,v| entities[k] = @build_context.entities[k].value }
+ @build_context = @build_context.parent
+ when :start_doctype
+ doctype = DocType.new( event[1..-1], @build_context )
+ @build_context = doctype
+ entities = {}
+ in_doctype = true
+ when :attlistdecl
+ n = AttlistDecl.new( event[1..-1] )
+ @build_context.add( n )
+ when :externalentity
+ n = ExternalEntity.new( event[1] )
+ @build_context.add( n )
+ when :elementdecl
+ n = ElementDecl.new( event[1] )
+ @build_context.add(n)
+ when :entitydecl
+ entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+ @build_context.add(Entity.new(event))
+ when :notationdecl
+ n = NotationDecl.new( *event[1..-1] )
+ @build_context.add( n )
+ when :xmldecl
+ x = XMLDecl.new( event[1], event[2], event[3] )
+ @build_context.add( x )
+ end
+ end
+ rescue REXML::Validation::ValidationException
+ raise
+ rescue REXML::ParseException
+ raise
+ rescue
+ raise ParseException.new( $!.message, @parser.source, @parser, $! )
+ end
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/ultralightparser.rb b/jni/ruby/lib/rexml/parsers/ultralightparser.rb
new file mode 100644
index 0000000..4e2d7a8
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/ultralightparser.rb
@@ -0,0 +1,56 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+
+module REXML
+ module Parsers
+ class UltraLightParser
+ def initialize stream
+ @stream = stream
+ @parser = REXML::Parsers::BaseParser.new( stream )
+ end
+
+ def add_listener( listener )
+ @parser.add_listener( listener )
+ end
+
+ def rewind
+ @stream.rewind
+ @parser.stream = @stream
+ end
+
+ def parse
+ root = context = []
+ while true
+ event = @parser.pull
+ case event[0]
+ when :end_document
+ break
+ when :end_doctype
+ context = context[1]
+ when :start_element, :start_doctype
+ context << event
+ event[1,0] = [context]
+ context = event
+ when :end_element
+ context = context[1]
+ else
+ context << event
+ end
+ end
+ root
+ end
+ end
+
+ # An element is an array. The array contains:
+ # 0 The parent element
+ # 1 The tag name
+ # 2 A hash of attributes
+ # 3..-1 The child elements
+ # An element is an array of size > 3
+ # Text is a String
+ # PIs are [ :processing_instruction, target, data ]
+ # Comments are [ :comment, data ]
+ # DocTypes are DocType structs
+ # The root is an array with XMLDecls, Text, DocType, Array, Text
+ end
+end
diff --git a/jni/ruby/lib/rexml/parsers/xpathparser.rb b/jni/ruby/lib/rexml/parsers/xpathparser.rb
new file mode 100644
index 0000000..57767fb
--- /dev/null
+++ b/jni/ruby/lib/rexml/parsers/xpathparser.rb
@@ -0,0 +1,656 @@
+require 'rexml/namespace'
+require 'rexml/xmltokens'
+
+module REXML
+ module Parsers
+ # You don't want to use this class. Really. Use XPath, which is a wrapper
+ # for this class. Believe me. You don't want to poke around in here.
+ # There is strange, dark magic at work in this code. Beware. Go back! Go
+ # back while you still can!
+ class XPathParser
+ include XMLTokens
+ LITERAL = /^'([^']*)'|^"([^"]*)"/u
+
+ def namespaces=( namespaces )
+ Functions::namespace_context = namespaces
+ @namespaces = namespaces
+ end
+
+ def parse path
+ path = path.dup
+ path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
+ path.gsub!( /\s+([\]\)])/, '\1')
+ parsed = []
+ OrExpr(path, parsed)
+ parsed
+ end
+
+ def predicate path
+ parsed = []
+ Predicate( "[#{path}]", parsed )
+ parsed
+ end
+
+ def abbreviate( path )
+ path = path.kind_of?(String) ? parse( path ) : path
+ string = ""
+ document = false
+ while path.size > 0
+ op = path.shift
+ case op
+ when :node
+ when :attribute
+ string << "/" if string.size > 0
+ string << "@"
+ when :child
+ string << "/" if string.size > 0
+ when :descendant_or_self
+ string << "/"
+ when :self
+ string << "."
+ when :parent
+ string << ".."
+ when :any
+ string << "*"
+ when :text
+ string << "text()"
+ when :following, :following_sibling,
+ :ancestor, :ancestor_or_self, :descendant,
+ :namespace, :preceding, :preceding_sibling
+ string << "/" unless string.size == 0
+ string << op.to_s.tr("_", "-")
+ string << "::"
+ when :qname
+ prefix = path.shift
+ name = path.shift
+ string << prefix+":" if prefix.size > 0
+ string << name
+ when :predicate
+ string << '['
+ string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
+ string << ']'
+ when :document
+ document = true
+ when :function
+ string << path.shift
+ string << "( "
+ string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
+ string << " )"
+ when :literal
+ string << %Q{ "#{path.shift}" }
+ else
+ string << "/" unless string.size == 0
+ string << "UNKNOWN("
+ string << op.inspect
+ string << ")"
+ end
+ end
+ string = "/"+string if document
+ return string
+ end
+
+ def expand( path )
+ path = path.kind_of?(String) ? parse( path ) : path
+ string = ""
+ document = false
+ while path.size > 0
+ op = path.shift
+ case op
+ when :node
+ string << "node()"
+ when :attribute, :child, :following, :following_sibling,
+ :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
+ :namespace, :preceding, :preceding_sibling, :self, :parent
+ string << "/" unless string.size == 0
+ string << op.to_s.tr("_", "-")
+ string << "::"
+ when :any
+ string << "*"
+ when :qname
+ prefix = path.shift
+ name = path.shift
+ string << prefix+":" if prefix.size > 0
+ string << name
+ when :predicate
+ string << '['
+ string << predicate_to_string( path.shift ) { |x| expand(x) }
+ string << ']'
+ when :document
+ document = true
+ else
+ string << "/" unless string.size == 0
+ string << "UNKNOWN("
+ string << op.inspect
+ string << ")"
+ end
+ end
+ string = "/"+string if document
+ return string
+ end
+
+ def predicate_to_string( path, &block )
+ string = ""
+ case path[0]
+ when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
+ op = path.shift
+ case op
+ when :eq
+ op = "="
+ when :lt
+ op = "<"
+ when :gt
+ op = ">"
+ when :lteq
+ op = "<="
+ when :gteq
+ op = ">="
+ when :neq
+ op = "!="
+ when :union
+ op = "|"
+ end
+ left = predicate_to_string( path.shift, &block )
+ right = predicate_to_string( path.shift, &block )
+ string << " "
+ string << left
+ string << " "
+ string << op.to_s
+ string << " "
+ string << right
+ string << " "
+ when :function
+ path.shift
+ name = path.shift
+ string << name
+ string << "( "
+ string << predicate_to_string( path.shift, &block )
+ string << " )"
+ when :literal
+ path.shift
+ string << " "
+ string << path.shift.inspect
+ string << " "
+ else
+ string << " "
+ string << yield( path )
+ string << " "
+ end
+ return string.squeeze(" ")
+ end
+
+ private
+ #LocationPath
+ # | RelativeLocationPath
+ # | '/' RelativeLocationPath?
+ # | '//' RelativeLocationPath
+ def LocationPath path, parsed
+ path = path.strip
+ if path[0] == ?/
+ parsed << :document
+ if path[1] == ?/
+ parsed << :descendant_or_self
+ parsed << :node
+ path = path[2..-1]
+ else
+ path = path[1..-1]
+ end
+ end
+ return RelativeLocationPath( path, parsed ) if path.size > 0
+ end
+
+ #RelativeLocationPath
+ # | Step
+ # | (AXIS_NAME '::' | '@' | '') AxisSpecifier
+ # NodeTest
+ # Predicate
+ # | '.' | '..' AbbreviatedStep
+ # | RelativeLocationPath '/' Step
+ # | RelativeLocationPath '//' Step
+ AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
+ def RelativeLocationPath path, parsed
+ while path.size > 0
+ # (axis or @ or <child::>) nodetest predicate >
+ # OR > / Step
+ # (. or ..) >
+ if path[0] == ?.
+ if path[1] == ?.
+ parsed << :parent
+ parsed << :node
+ path = path[2..-1]
+ else
+ parsed << :self
+ parsed << :node
+ path = path[1..-1]
+ end
+ else
+ if path[0] == ?@
+ parsed << :attribute
+ path = path[1..-1]
+ # Goto Nodetest
+ elsif path =~ AXIS
+ parsed << $1.tr('-','_').intern
+ path = $'
+ # Goto Nodetest
+ else
+ parsed << :child
+ end
+
+ n = []
+ path = NodeTest( path, n)
+
+ if path[0] == ?[
+ path = Predicate( path, n )
+ end
+
+ parsed.concat(n)
+ end
+
+ if path.size > 0
+ if path[0] == ?/
+ if path[1] == ?/
+ parsed << :descendant_or_self
+ parsed << :node
+ path = path[2..-1]
+ else
+ path = path[1..-1]
+ end
+ else
+ return path
+ end
+ end
+ end
+ return path
+ end
+
+ # Returns a 1-1 map of the nodeset
+ # The contents of the resulting array are either:
+ # true/false, if a positive match
+ # String, if a name match
+ #NodeTest
+ # | ('*' | NCNAME ':' '*' | QNAME) NameTest
+ # | NODE_TYPE '(' ')' NodeType
+ # | PI '(' LITERAL ')' PI
+ # | '[' expr ']' Predicate
+ NCNAMETEST= /^(#{NCNAME_STR}):\*/u
+ QNAME = Namespace::NAMESPLIT
+ NODE_TYPE = /^(comment|text|node)\(\s*\)/m
+ PI = /^processing-instruction\(/
+ def NodeTest path, parsed
+ case path
+ when /^\*/
+ path = $'
+ parsed << :any
+ when NODE_TYPE
+ type = $1
+ path = $'
+ parsed << type.tr('-', '_').intern
+ when PI
+ path = $'
+ literal = nil
+ if path !~ /^\s*\)/
+ path =~ LITERAL
+ literal = $1
+ path = $'
+ raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
+ path = path[1..-1]
+ end
+ parsed << :processing_instruction
+ parsed << (literal || '')
+ when NCNAMETEST
+ prefix = $1
+ path = $'
+ parsed << :namespace
+ parsed << prefix
+ when QNAME
+ prefix = $1
+ name = $2
+ path = $'
+ prefix = "" unless prefix
+ parsed << :qname
+ parsed << prefix
+ parsed << name
+ end
+ return path
+ end
+
+ # Filters the supplied nodeset on the predicate(s)
+ def Predicate path, parsed
+ return nil unless path[0] == ?[
+ predicates = []
+ while path[0] == ?[
+ path, expr = get_group(path)
+ predicates << expr[1..-2] if expr
+ end
+ predicates.each{ |pred|
+ preds = []
+ parsed << :predicate
+ parsed << preds
+ OrExpr(pred, preds)
+ }
+ path
+ end
+
+ # The following return arrays of true/false, a 1-1 mapping of the
+ # supplied nodeset, except for axe(), which returns a filtered
+ # nodeset
+
+ #| OrExpr S 'or' S AndExpr
+ #| AndExpr
+ def OrExpr path, parsed
+ n = []
+ rest = AndExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*( or )/
+ n = [ :or, n, [] ]
+ rest = AndExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| AndExpr S 'and' S EqualityExpr
+ #| EqualityExpr
+ def AndExpr path, parsed
+ n = []
+ rest = EqualityExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*( and )/
+ n = [ :and, n, [] ]
+ rest = EqualityExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| EqualityExpr ('=' | '!=') RelationalExpr
+ #| RelationalExpr
+ def EqualityExpr path, parsed
+ n = []
+ rest = RelationalExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*(!?=)\s*/
+ if $1[0] == ?!
+ n = [ :neq, n, [] ]
+ else
+ n = [ :eq, n, [] ]
+ end
+ rest = RelationalExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
+ #| AdditiveExpr
+ def RelationalExpr path, parsed
+ n = []
+ rest = AdditiveExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*([<>]=?)\s*/
+ if $1[0] == ?<
+ sym = "lt"
+ else
+ sym = "gt"
+ end
+ sym << "eq" if $1[-1] == ?=
+ n = [ sym.intern, n, [] ]
+ rest = AdditiveExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
+ #| MultiplicativeExpr
+ def AdditiveExpr path, parsed
+ n = []
+ rest = MultiplicativeExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*(\+| -)\s*/
+ if $1[0] == ?+
+ n = [ :plus, n, [] ]
+ else
+ n = [ :minus, n, [] ]
+ end
+ rest = MultiplicativeExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
+ #| UnaryExpr
+ def MultiplicativeExpr path, parsed
+ n = []
+ rest = UnaryExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*(\*| div | mod )\s*/
+ if $1[0] == ?*
+ n = [ :mult, n, [] ]
+ elsif $1.include?( "div" )
+ n = [ :div, n, [] ]
+ else
+ n = [ :mod, n, [] ]
+ end
+ rest = UnaryExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace(n)
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| '-' UnaryExpr
+ #| UnionExpr
+ def UnaryExpr path, parsed
+ path =~ /^(\-*)/
+ path = $'
+ if $1 and (($1.size % 2) != 0)
+ mult = -1
+ else
+ mult = 1
+ end
+ parsed << :neg if mult < 0
+
+ n = []
+ path = UnionExpr( path, n )
+ parsed.concat( n )
+ path
+ end
+
+ #| UnionExpr '|' PathExpr
+ #| PathExpr
+ def UnionExpr path, parsed
+ n = []
+ rest = PathExpr( path, n )
+ if rest != path
+ while rest =~ /^\s*(\|)\s*/
+ n = [ :union, n, [] ]
+ rest = PathExpr( $', n[-1] )
+ end
+ end
+ if parsed.size == 0 and n.size != 0
+ parsed.replace( n )
+ elsif n.size > 0
+ parsed << n
+ end
+ rest
+ end
+
+ #| LocationPath
+ #| FilterExpr ('/' | '//') RelativeLocationPath
+ def PathExpr path, parsed
+ path =~ /^\s*/
+ path = $'
+ n = []
+ rest = FilterExpr( path, n )
+ if rest != path
+ if rest and rest[0] == ?/
+ return RelativeLocationPath(rest, n)
+ end
+ end
+ rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
+ parsed.concat(n)
+ return rest
+ end
+
+ #| FilterExpr Predicate
+ #| PrimaryExpr
+ def FilterExpr path, parsed
+ n = []
+ path = PrimaryExpr( path, n )
+ path = Predicate(path, n) if path and path[0] == ?[
+ parsed.concat(n)
+ path
+ end
+
+ #| VARIABLE_REFERENCE
+ #| '(' expr ')'
+ #| LITERAL
+ #| NUMBER
+ #| FunctionCall
+ VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
+ NUMBER = /^(\d*\.?\d+)/
+ NT = /^comment|text|processing-instruction|node$/
+ def PrimaryExpr path, parsed
+ case path
+ when VARIABLE_REFERENCE
+ varname = $1
+ path = $'
+ parsed << :variable
+ parsed << varname
+ #arry << @variables[ varname ]
+ when /^(\w[-\w]*)(?:\()/
+ fname = $1
+ tmp = $'
+ return path if fname =~ NT
+ path = tmp
+ parsed << :function
+ parsed << fname
+ path = FunctionCall(path, parsed)
+ when NUMBER
+ varname = $1.nil? ? $2 : $1
+ path = $'
+ parsed << :literal
+ parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
+ when LITERAL
+ varname = $1.nil? ? $2 : $1
+ path = $'
+ parsed << :literal
+ parsed << varname
+ when /^\(/ #/
+ path, contents = get_group(path)
+ contents = contents[1..-2]
+ n = []
+ OrExpr( contents, n )
+ parsed.concat(n)
+ end
+ path
+ end
+
+ #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
+ def FunctionCall rest, parsed
+ path, arguments = parse_args(rest)
+ argset = []
+ for argument in arguments
+ args = []
+ OrExpr( argument, args )
+ argset << args
+ end
+ parsed << argset
+ path
+ end
+
+ # get_group( '[foo]bar' ) -> ['bar', '[foo]']
+ def get_group string
+ ind = 0
+ depth = 0
+ st = string[0,1]
+ en = (st == "(" ? ")" : "]")
+ begin
+ case string[ind,1]
+ when st
+ depth += 1
+ when en
+ depth -= 1
+ end
+ ind += 1
+ end while depth > 0 and ind < string.length
+ return nil unless depth==0
+ [string[ind..-1], string[0..ind-1]]
+ end
+
+ def parse_args( string )
+ arguments = []
+ ind = 0
+ inquot = false
+ inapos = false
+ depth = 1
+ begin
+ case string[ind]
+ when ?"
+ inquot = !inquot unless inapos
+ when ?'
+ inapos = !inapos unless inquot
+ else
+ unless inquot or inapos
+ case string[ind]
+ when ?(
+ depth += 1
+ if depth == 1
+ string = string[1..-1]
+ ind -= 1
+ end
+ when ?)
+ depth -= 1
+ if depth == 0
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ end
+ when ?,
+ if depth == 1
+ s = string[0,ind].strip
+ arguments << s unless s == ""
+ string = string[ind+1..-1]
+ ind = -1
+ end
+ end
+ end
+ end
+ ind += 1
+ end while depth > 0 and ind < string.length
+ return nil unless depth==0
+ [string,arguments]
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/quickpath.rb b/jni/ruby/lib/rexml/quickpath.rb
new file mode 100644
index 0000000..9bec215
--- /dev/null
+++ b/jni/ruby/lib/rexml/quickpath.rb
@@ -0,0 +1,265 @@
+require 'rexml/functions'
+require 'rexml/xmltokens'
+
+module REXML
+ class QuickPath
+ include Functions
+ include XMLTokens
+
+ # A base Hash object to be used when initializing a
+ # default empty namespaces set.
+ EMPTY_HASH = {}
+
+ def QuickPath::first element, path, namespaces=EMPTY_HASH
+ match(element, path, namespaces)[0]
+ end
+
+ def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
+ path = "*" unless path
+ match(element, path, namespaces).each( &block )
+ end
+
+ def QuickPath::match element, path, namespaces=EMPTY_HASH
+ raise "nil is not a valid xpath" unless path
+ results = nil
+ Functions::namespace_context = namespaces
+ case path
+ when /^\/([^\/]|$)/u
+ # match on root
+ path = path[1..-1]
+ return [element.root.parent] if path == ''
+ results = filter([element.root], path)
+ when /^[-\w]*::/u
+ results = filter([element], path)
+ when /^\*/u
+ results = filter(element.to_a, path)
+ when /^[\[!\w:]/u
+ # match on child
+ children = element.to_a
+ results = filter(children, path)
+ else
+ results = filter([element], path)
+ end
+ return results
+ end
+
+ # Given an array of nodes it filters the array based on the path. The
+ # result is that when this method returns, the array will contain elements
+ # which match the path
+ def QuickPath::filter elements, path
+ return elements if path.nil? or path == '' or elements.size == 0
+ case path
+ when /^\/\//u # Descendant
+ return axe( elements, "descendant-or-self", $' )
+ when /^\/?\b(\w[-\w]*)\b::/u # Axe
+ return axe( elements, $1, $' )
+ when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
+ rest = $'
+ results = []
+ elements.each do |element|
+ results |= filter( element.to_a, rest )
+ end
+ return results
+ when /^\/?(\w[-\w]*)\(/u # / Function
+ return function( elements, $1, $' )
+ when Namespace::NAMESPLIT # Element name
+ name = $2
+ ns = $1
+ rest = $'
+ elements.delete_if do |element|
+ !(element.kind_of? Element and
+ (element.expanded_name == name or
+ (element.name == name and
+ element.namespace == Functions.namespace_context[ns])))
+ end
+ return filter( elements, rest )
+ when /^\/\[/u
+ matches = []
+ elements.each do |element|
+ matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
+ end
+ return matches
+ when /^\[/u # Predicate
+ return predicate( elements, path )
+ when /^\/?\.\.\./u # Ancestor
+ return axe( elements, "ancestor", $' )
+ when /^\/?\.\./u # Parent
+ return filter( elements.collect{|e|e.parent}, $' )
+ when /^\/?\./u # Self
+ return filter( elements, $' )
+ when /^\*/u # Any
+ results = []
+ elements.each do |element|
+ results |= filter( [element], $' ) if element.kind_of? Element
+ #if element.kind_of? Element
+ # children = element.to_a
+ # children.delete_if { |child| !child.kind_of?(Element) }
+ # results |= filter( children, $' )
+ #end
+ end
+ return results
+ end
+ return []
+ end
+
+ def QuickPath::axe( elements, axe_name, rest )
+ matches = []
+ matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
+ case axe_name
+ when /^descendant/u
+ elements.each do |element|
+ matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
+ end
+ when /^ancestor/u
+ elements.each do |element|
+ while element.parent
+ matches << element.parent
+ element = element.parent
+ end
+ end
+ matches = filter( matches, rest )
+ when "self"
+ matches = filter( elements, rest )
+ when "child"
+ elements.each do |element|
+ matches |= filter( element.to_a, rest ) if element.kind_of? Element
+ end
+ when "attribute"
+ elements.each do |element|
+ matches << element.attributes[ rest ] if element.kind_of? Element
+ end
+ when "parent"
+ matches = filter(elements.collect{|element| element.parent}.uniq, rest)
+ when "following-sibling"
+ matches = filter(elements.collect{|element| element.next_sibling}.uniq,
+ rest)
+ when "previous-sibling"
+ matches = filter(elements.collect{|element|
+ element.previous_sibling}.uniq, rest )
+ end
+ return matches.uniq
+ end
+
+ OPERAND_ = '((?=(?:(?!and|or).)*[^\s<>=])[^\s<>=]+)'
+ # A predicate filters a node-set with respect to an axis to produce a
+ # new node-set. For each node in the node-set to be filtered, the
+ # PredicateExpr is evaluated with that node as the context node, with
+ # the number of nodes in the node-set as the context size, and with the
+ # proximity position of the node in the node-set with respect to the
+ # axis as the context position; if PredicateExpr evaluates to true for
+ # that node, the node is included in the new node-set; otherwise, it is
+ # not included.
+ #
+ # A PredicateExpr is evaluated by evaluating the Expr and converting
+ # the result to a boolean. If the result is a number, the result will
+ # be converted to true if the number is equal to the context position
+ # and will be converted to false otherwise; if the result is not a
+ # number, then the result will be converted as if by a call to the
+ # boolean function. Thus a location path para[3] is equivalent to
+ # para[position()=3].
+ def QuickPath::predicate( elements, path )
+ ind = 1
+ bcount = 1
+ while bcount > 0
+ bcount += 1 if path[ind] == ?[
+ bcount -= 1 if path[ind] == ?]
+ ind += 1
+ end
+ ind -= 1
+ predicate = path[1..ind-1]
+ rest = path[ind+1..-1]
+
+ # have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
+ #
+ predicate.gsub!(
+ /#{OPERAND_}\s*([<>=])\s*#{OPERAND_}\s*([<>=])\s*#{OPERAND_}/u,
+ '\1 \2 \3 and \3 \4 \5' )
+ # Let's do some Ruby trickery to avoid some work:
+ predicate.gsub!( /&/u, "&&" )
+ predicate.gsub!( /=/u, "==" )
+ predicate.gsub!( /@(\w[-\w.]*)/u, 'attribute("\1")' )
+ predicate.gsub!( /\bmod\b/u, "%" )
+ predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
+ fname = $1
+ fname.gsub( /-/u, "_" )
+ }
+
+ Functions.pair = [ 0, elements.size ]
+ results = []
+ elements.each do |element|
+ Functions.pair[0] += 1
+ Functions.node = element
+ res = eval( predicate )
+ case res
+ when true
+ results << element
+ when Fixnum
+ results << element if Functions.pair[0] == res
+ when String
+ results << element
+ end
+ end
+ return filter( results, rest )
+ end
+
+ def QuickPath::attribute( name )
+ return Functions.node.attributes[name] if Functions.node.kind_of? Element
+ end
+
+ def QuickPath::name()
+ return Functions.node.name if Functions.node.kind_of? Element
+ end
+
+ def QuickPath::method_missing( id, *args )
+ begin
+ Functions.send( id.id2name, *args )
+ rescue Exception
+ raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
+ end
+ end
+
+ def QuickPath::function( elements, fname, rest )
+ args = parse_args( elements, rest )
+ Functions.pair = [0, elements.size]
+ results = []
+ elements.each do |element|
+ Functions.pair[0] += 1
+ Functions.node = element
+ res = Functions.send( fname, *args )
+ case res
+ when true
+ results << element
+ when Fixnum
+ results << element if Functions.pair[0] == res
+ end
+ end
+ return results
+ end
+
+ def QuickPath::parse_args( element, string )
+ # /.*?(?:\)|,)/
+ arguments = []
+ buffer = ""
+ while string and string != ""
+ c = string[0]
+ string.sub!(/^./u, "")
+ case c
+ when ?,
+ # if depth = 1, then we start a new argument
+ arguments << evaluate( buffer )
+ #arguments << evaluate( string[0..count] )
+ when ?(
+ # start a new method call
+ function( element, buffer, string )
+ buffer = ""
+ when ?)
+ # close the method call and return arguments
+ return arguments
+ else
+ buffer << c
+ end
+ end
+ ""
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/rexml.rb b/jni/ruby/lib/rexml/rexml.rb
new file mode 100644
index 0000000..2c0defc
--- /dev/null
+++ b/jni/ruby/lib/rexml/rexml.rb
@@ -0,0 +1,31 @@
+# -*- encoding: utf-8 -*-
+# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
+#
+# REXML is a _pure_ Ruby, XML 1.0 conforming,
+# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
+# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
+# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
+# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
+# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
+# Ruby 1.8, REXML is included in the standard Ruby distribution.
+#
+# Main page:: http://www.germane-software.com/software/rexml
+# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
+# Date:: 2008/019
+# Version:: 3.1.7.3
+#
+# This API documentation can be downloaded from the REXML home page, or can
+# be accessed online[http://www.germane-software.com/software/rexml_doc]
+#
+# A tutorial is available in the REXML distribution in docs/tutorial.html,
+# or can be accessed
+# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
+module REXML
+ COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
+ DATE = "2008/019"
+ VERSION = "3.1.7.3"
+ REVISION = %w$Revision: 39528 $[1] || ''
+
+ Copyright = COPYRIGHT
+ Version = VERSION
+end
diff --git a/jni/ruby/lib/rexml/sax2listener.rb b/jni/ruby/lib/rexml/sax2listener.rb
new file mode 100644
index 0000000..9f276eb
--- /dev/null
+++ b/jni/ruby/lib/rexml/sax2listener.rb
@@ -0,0 +1,97 @@
+module REXML
+ # A template for stream parser listeners.
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
+ # have to parse them out yourself.
+ # === Missing methods from SAX2
+ # ignorable_whitespace
+ # === Methods extending SAX2
+ # +WARNING+
+ # These methods are certainly going to change, until DTDs are fully
+ # supported. Be aware of this.
+ # start_document
+ # end_document
+ # doctype
+ # elementdecl
+ # attlistdecl
+ # entitydecl
+ # notationdecl
+ # cdata
+ # xmldecl
+ # comment
+ module SAX2Listener
+ def start_document
+ end
+ def end_document
+ end
+ def start_prefix_mapping prefix, uri
+ end
+ def end_prefix_mapping prefix
+ end
+ def start_element uri, localname, qname, attributes
+ end
+ def end_element uri, localname, qname
+ end
+ def characters text
+ end
+ def processing_instruction target, data
+ end
+ # Handles a doctype declaration. Any attributes of the doctype which are
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
+ # @p name the name of the doctype; EG, "me"
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
+ # @p long_name the supplied long name, or nil. EG, "foo"
+ # @p uri the uri of the doctype, or nil. EG, "bar"
+ def doctype name, pub_sys, long_name, uri
+ end
+ # If a doctype includes an ATTLIST declaration, it will cause this
+ # method to be called. The content is the declaration itself, unparsed.
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
+ # methods.
+ def attlistdecl(element, pairs, contents)
+ end
+ # <!ELEMENT ...>
+ def elementdecl content
+ end
+ # <!ENTITY ...>
+ # The argument passed to this method is an array of the entity
+ # declaration. It can be in a number of formats, but in general it
+ # returns (example, result):
+ # <!ENTITY % YN '"Yes"'>
+ # ["%", "YN", "\"Yes\""]
+ # <!ENTITY % YN 'Yes'>
+ # ["%", "YN", "Yes"]
+ # <!ENTITY WhatHeSaid "He said %YN;">
+ # ["WhatHeSaid", "He said %YN;"]
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
+ # ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
+ # ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
+ # ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "NDATA", "gif"]
+ def entitydecl declaration
+ end
+ # <!NOTATION ...>
+ def notationdecl name, public_or_system, public_id, system_id
+ end
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
+ # @p content "..."
+ def cdata content
+ end
+ # Called when an XML PI is encountered in the document.
+ # EG: <?xml version="1.0" encoding="utf"?>
+ # @p version the version attribute value. EG, "1.0"
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
+ # @p standalone the standalone attribute value, or nil. EG, nil
+ # @p spaced the declaration is followed by a line break
+ def xmldecl version, encoding, standalone
+ end
+ # Called when a comment is encountered.
+ # @p comment The content of the comment
+ def comment comment
+ end
+ def progress position
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/security.rb b/jni/ruby/lib/rexml/security.rb
new file mode 100644
index 0000000..593b652
--- /dev/null
+++ b/jni/ruby/lib/rexml/security.rb
@@ -0,0 +1,27 @@
+module REXML
+ module Security
+ @@entity_expansion_limit = 10_000
+
+ # Set the entity expansion limit. By default the limit is set to 10000.
+ def self.entity_expansion_limit=( val )
+ @@entity_expansion_limit = val
+ end
+
+ # Get the entity expansion limit. By default the limit is set to 10000.
+ def self.entity_expansion_limit
+ return @@entity_expansion_limit
+ end
+
+ @@entity_expansion_text_limit = 10_240
+
+ # Set the entity expansion limit. By default the limit is set to 10240.
+ def self.entity_expansion_text_limit=( val )
+ @@entity_expansion_text_limit = val
+ end
+
+ # Get the entity expansion limit. By default the limit is set to 10240.
+ def self.entity_expansion_text_limit
+ return @@entity_expansion_text_limit
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/source.rb b/jni/ruby/lib/rexml/source.rb
new file mode 100644
index 0000000..cfafdbd
--- /dev/null
+++ b/jni/ruby/lib/rexml/source.rb
@@ -0,0 +1,296 @@
+# coding: US-ASCII
+require 'rexml/encoding'
+
+module REXML
+ # Generates Source-s. USE THIS CLASS.
+ class SourceFactory
+ # Generates a Source object
+ # @param arg Either a String, or an IO
+ # @return a Source, or nil if a bad argument was given
+ def SourceFactory::create_from(arg)
+ if arg.respond_to? :read and
+ arg.respond_to? :readline and
+ arg.respond_to? :nil? and
+ arg.respond_to? :eof?
+ IOSource.new(arg)
+ elsif arg.respond_to? :to_str
+ require 'stringio'
+ IOSource.new(StringIO.new(arg))
+ elsif arg.kind_of? Source
+ arg
+ else
+ raise "#{arg.class} is not a valid input stream. It must walk \n"+
+ "like either a String, an IO, or a Source."
+ end
+ end
+ end
+
+ # A Source can be searched for patterns, and wraps buffers and other
+ # objects and provides consumption of text
+ class Source
+ include Encoding
+ # The current buffer (what we're going to read next)
+ attr_reader :buffer
+ # The line number of the last consumed text
+ attr_reader :line
+ attr_reader :encoding
+
+ # Constructor
+ # @param arg must be a String, and should be a valid XML document
+ # @param encoding if non-null, sets the encoding of the source to this
+ # value, overriding all encoding detection
+ def initialize(arg, encoding=nil)
+ @orig = @buffer = arg
+ if encoding
+ self.encoding = encoding
+ else
+ detect_encoding
+ end
+ @line = 0
+ end
+
+
+ # Inherited from Encoding
+ # Overridden to support optimized en/decoding
+ def encoding=(enc)
+ return unless super
+ encoding_updated
+ end
+
+ # Scans the source for a given pattern. Note, that this is not your
+ # usual scan() method. For one thing, the pattern argument has some
+ # requirements; for another, the source can be consumed. You can easily
+ # confuse this method. Originally, the patterns were easier
+ # to construct and this method more robust, because this method
+ # generated search regexps on the fly; however, this was
+ # computationally expensive and slowed down the entire REXML package
+ # considerably, since this is by far the most commonly called method.
+ # @param pattern must be a Regexp, and must be in the form of
+ # /^\s*(#{your pattern, with no groups})(.*)/. The first group
+ # will be returned; the second group is used if the consume flag is
+ # set.
+ # @param consume if true, the pattern returned will be consumed, leaving
+ # everything after it in the Source.
+ # @return the pattern, if found, or nil if the Source is empty or the
+ # pattern is not found.
+ def scan(pattern, cons=false)
+ return nil if @buffer.nil?
+ rv = @buffer.scan(pattern)
+ @buffer = $' if cons and rv.size>0
+ rv
+ end
+
+ def read
+ end
+
+ def consume( pattern )
+ @buffer = $' if pattern.match( @buffer )
+ end
+
+ def match_to( char, pattern )
+ return pattern.match(@buffer)
+ end
+
+ def match_to_consume( char, pattern )
+ md = pattern.match(@buffer)
+ @buffer = $'
+ return md
+ end
+
+ def match(pattern, cons=false)
+ md = pattern.match(@buffer)
+ @buffer = $' if cons and md
+ return md
+ end
+
+ # @return true if the Source is exhausted
+ def empty?
+ @buffer == ""
+ end
+
+ def position
+ @orig.index( @buffer )
+ end
+
+ # @return the current line in the source
+ def current_line
+ lines = @orig.split
+ res = lines.grep @buffer[0..30]
+ res = res[-1] if res.kind_of? Array
+ lines.index( res ) if res
+ end
+
+ private
+ def detect_encoding
+ buffer_encoding = @buffer.encoding
+ detected_encoding = "UTF-8"
+ begin
+ @buffer.force_encoding("ASCII-8BIT")
+ if @buffer[0, 2] == "\xfe\xff"
+ @buffer[0, 2] = ""
+ detected_encoding = "UTF-16BE"
+ elsif @buffer[0, 2] == "\xff\xfe"
+ @buffer[0, 2] = ""
+ detected_encoding = "UTF-16LE"
+ elsif @buffer[0, 3] == "\xef\xbb\xbf"
+ @buffer[0, 3] = ""
+ detected_encoding = "UTF-8"
+ end
+ ensure
+ @buffer.force_encoding(buffer_encoding)
+ end
+ self.encoding = detected_encoding
+ end
+
+ def encoding_updated
+ if @encoding != 'UTF-8'
+ @buffer = decode(@buffer)
+ @to_utf = true
+ else
+ @to_utf = false
+ @buffer.force_encoding ::Encoding::UTF_8
+ end
+ end
+ end
+
+ # A Source that wraps an IO. See the Source class for method
+ # documentation
+ class IOSource < Source
+ #attr_reader :block_size
+
+ # block_size has been deprecated
+ def initialize(arg, block_size=500, encoding=nil)
+ @er_source = @source = arg
+ @to_utf = false
+ @pending_buffer = nil
+
+ if encoding
+ super("", encoding)
+ else
+ super(@source.read(3) || "")
+ end
+
+ if !@to_utf and
+ @buffer.respond_to?(:force_encoding) and
+ @source.respond_to?(:external_encoding) and
+ @source.external_encoding != ::Encoding::UTF_8
+ @force_utf8 = true
+ else
+ @force_utf8 = false
+ end
+ end
+
+ def scan(pattern, cons=false)
+ rv = super
+ # You'll notice that this next section is very similar to the same
+ # section in match(), but just a liiittle different. This is
+ # because it is a touch faster to do it this way with scan()
+ # than the way match() does it; enough faster to warrent duplicating
+ # some code
+ if rv.size == 0
+ until @buffer =~ pattern or @source.nil?
+ begin
+ @buffer << readline
+ rescue Iconv::IllegalSequence
+ raise
+ rescue
+ @source = nil
+ end
+ end
+ rv = super
+ end
+ rv.taint
+ rv
+ end
+
+ def read
+ begin
+ @buffer << readline
+ rescue Exception, NameError
+ @source = nil
+ end
+ end
+
+ def consume( pattern )
+ match( pattern, true )
+ end
+
+ def match( pattern, cons=false )
+ rv = pattern.match(@buffer)
+ @buffer = $' if cons and rv
+ while !rv and @source
+ begin
+ @buffer << readline
+ rv = pattern.match(@buffer)
+ @buffer = $' if cons and rv
+ rescue
+ @source = nil
+ end
+ end
+ rv.taint
+ rv
+ end
+
+ def empty?
+ super and ( @source.nil? || @source.eof? )
+ end
+
+ def position
+ @er_source.pos rescue 0
+ end
+
+ # @return the current line in the source
+ def current_line
+ begin
+ pos = @er_source.pos # The byte position in the source
+ lineno = @er_source.lineno # The XML < position in the source
+ @er_source.rewind
+ line = 0 # The \r\n position in the source
+ begin
+ while @er_source.pos < pos
+ @er_source.readline
+ line += 1
+ end
+ rescue
+ end
+ rescue IOError
+ pos = -1
+ line = -1
+ end
+ [pos, lineno, line]
+ end
+
+ private
+ def readline
+ str = @source.readline(@line_break)
+ if @pending_buffer
+ if str.nil?
+ str = @pending_buffer
+ else
+ str = @pending_buffer + str
+ end
+ @pending_buffer = nil
+ end
+ return nil if str.nil?
+
+ if @to_utf
+ decode(str)
+ else
+ str.force_encoding(::Encoding::UTF_8) if @force_utf8
+ str
+ end
+ end
+
+ def encoding_updated
+ case @encoding
+ when "UTF-16BE", "UTF-16LE"
+ @source.binmode
+ @source.set_encoding(@encoding, @encoding)
+ end
+ @line_break = encode(">")
+ @pending_buffer, @buffer = @buffer, ""
+ @pending_buffer.force_encoding(@encoding)
+ super
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/streamlistener.rb b/jni/ruby/lib/rexml/streamlistener.rb
new file mode 100644
index 0000000..3a0528c
--- /dev/null
+++ b/jni/ruby/lib/rexml/streamlistener.rb
@@ -0,0 +1,92 @@
+module REXML
+ # A template for stream parser listeners.
+ # Note that the declarations (attlistdecl, elementdecl, etc) are trivially
+ # processed; REXML doesn't yet handle doctype entity declarations, so you
+ # have to parse them out yourself.
+ module StreamListener
+ # Called when a tag is encountered.
+ # @p name the tag name
+ # @p attrs an array of arrays of attribute/value pairs, suitable for
+ # use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
+ # will result in
+ # tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
+ def tag_start name, attrs
+ end
+ # Called when the end tag is reached. In the case of <tag/>, tag_end
+ # will be called immediately after tag_start
+ # @p the name of the tag
+ def tag_end name
+ end
+ # Called when text is encountered in the document
+ # @p text the text content.
+ def text text
+ end
+ # Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
+ # @p name the instruction name; in the example, "xsl"
+ # @p instruction the rest of the instruction. In the example,
+ # "sheet='foo'"
+ def instruction name, instruction
+ end
+ # Called when a comment is encountered.
+ # @p comment The content of the comment
+ def comment comment
+ end
+ # Handles a doctype declaration. Any attributes of the doctype which are
+ # not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
+ # @p name the name of the doctype; EG, "me"
+ # @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
+ # @p long_name the supplied long name, or nil. EG, "foo"
+ # @p uri the uri of the doctype, or nil. EG, "bar"
+ def doctype name, pub_sys, long_name, uri
+ end
+ # Called when the doctype is done
+ def doctype_end
+ end
+ # If a doctype includes an ATTLIST declaration, it will cause this
+ # method to be called. The content is the declaration itself, unparsed.
+ # EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
+ # attr CDATA #REQUIRED". This is the same for all of the .*decl
+ # methods.
+ def attlistdecl element_name, attributes, raw_content
+ end
+ # <!ELEMENT ...>
+ def elementdecl content
+ end
+ # <!ENTITY ...>
+ # The argument passed to this method is an array of the entity
+ # declaration. It can be in a number of formats, but in general it
+ # returns (example, result):
+ # <!ENTITY % YN '"Yes"'>
+ # ["YN", "\"Yes\"", "%"]
+ # <!ENTITY % YN 'Yes'>
+ # ["YN", "Yes", "%"]
+ # <!ENTITY WhatHeSaid "He said %YN;">
+ # ["WhatHeSaid", "He said %YN;"]
+ # <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
+ # ["open-hatch", "SYSTEM", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
+ # <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
+ # ["open-hatch", "PUBLIC", "-//Textuality//TEXT Standard open-hatch boilerplate//EN", "http://www.textuality.com/boilerplate/OpenHatch.xml"]
+ # <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
+ # ["hatch-pic", "SYSTEM", "../grafix/OpenHatch.gif", "gif"]
+ def entitydecl content
+ end
+ # <!NOTATION ...>
+ def notationdecl content
+ end
+ # Called when %foo; is encountered in a doctype declaration.
+ # @p content "foo"
+ def entity content
+ end
+ # Called when <![CDATA[ ... ]]> is encountered in a document.
+ # @p content "..."
+ def cdata content
+ end
+ # Called when an XML PI is encountered in the document.
+ # EG: <?xml version="1.0" encoding="utf"?>
+ # @p version the version attribute value. EG, "1.0"
+ # @p encoding the encoding attribute value, or nil. EG, "utf"
+ # @p standalone the standalone attribute value, or nil. EG, nil
+ def xmldecl version, encoding, standalone
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/syncenumerator.rb b/jni/ruby/lib/rexml/syncenumerator.rb
new file mode 100644
index 0000000..11609bd
--- /dev/null
+++ b/jni/ruby/lib/rexml/syncenumerator.rb
@@ -0,0 +1,32 @@
+module REXML
+ class SyncEnumerator
+ include Enumerable
+
+ # Creates a new SyncEnumerator which enumerates rows of given
+ # Enumerable objects.
+ def initialize(*enums)
+ @gens = enums
+ @length = @gens.collect {|x| x.size }.max
+ end
+
+ # Returns the number of enumerated Enumerable objects, i.e. the size
+ # of each row.
+ def size
+ @gens.size
+ end
+
+ # Returns the number of enumerated Enumerable objects, i.e. the size
+ # of each row.
+ def length
+ @gens.length
+ end
+
+ # Enumerates rows of the Enumerable objects.
+ def each
+ @length.times {|i|
+ yield @gens.collect {|x| x[i]}
+ }
+ self
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/text.rb b/jni/ruby/lib/rexml/text.rb
new file mode 100644
index 0000000..d3242ee
--- /dev/null
+++ b/jni/ruby/lib/rexml/text.rb
@@ -0,0 +1,425 @@
+require 'rexml/security'
+require 'rexml/entity'
+require 'rexml/doctype'
+require 'rexml/child'
+require 'rexml/doctype'
+require 'rexml/parseexception'
+
+module REXML
+ # Represents text nodes in an XML document
+ class Text < Child
+ include Comparable
+ # The order in which the substitutions occur
+ SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
+ SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
+ # Characters which are substituted in written strings
+ SLAICEPS = [ '<', '>', '"', "'", '&' ]
+ SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
+
+ # If +raw+ is true, then REXML leaves the value alone
+ attr_accessor :raw
+
+ NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
+ NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
+ VALID_CHAR = [
+ 0x9, 0xA, 0xD,
+ (0x20..0xD7FF),
+ (0xE000..0xFFFD),
+ (0x10000..0x10FFFF)
+ ]
+
+ if String.method_defined? :encode
+ VALID_XML_CHARS = Regexp.new('^['+
+ VALID_CHAR.map { |item|
+ case item
+ when Fixnum
+ [item].pack('U').force_encoding('utf-8')
+ when Range
+ [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
+ end
+ }.join +
+ ']*$')
+ else
+ VALID_XML_CHARS = /^(
+ [\x09\x0A\x0D\x20-\x7E] # ASCII
+ | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
+ | [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
+ | \xEF[\x80-\xBE]{2} #
+ | \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
+ )*$/nx;
+ end
+
+ # Constructor
+ # +arg+ if a String, the content is set to the String. If a Text,
+ # the object is shallowly cloned.
+ #
+ # +respect_whitespace+ (boolean, false) if true, whitespace is
+ # respected
+ #
+ # +parent+ (nil) if this is a Parent object, the parent
+ # will be set to this.
+ #
+ # +raw+ (nil) This argument can be given three values.
+ # If true, then the value of used to construct this object is expected to
+ # contain no unescaped XML markup, and REXML will not change the text. If
+ # this value is false, the string may contain any characters, and REXML will
+ # escape any and all defined entities whose values are contained in the
+ # text. If this value is nil (the default), then the raw value of the
+ # parent will be used as the raw value for this node. If there is no raw
+ # value for the parent, and no value is supplied, the default is false.
+ # Use this field if you have entities defined for some text, and you don't
+ # want REXML to escape that text in output.
+ # Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
+ # Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
+ # Text.new( "<&", false, nil, true ) #-> Parse exception
+ # Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
+ # # Assume that the entity "s" is defined to be "sean"
+ # # and that the entity "r" is defined to be "russell"
+ # Text.new( "sean russell" ) #-> "&s; &r;"
+ # Text.new( "sean russell", false, nil, true ) #-> "sean russell"
+ #
+ # +entity_filter+ (nil) This can be an array of entities to match in the
+ # supplied text. This argument is only useful if +raw+ is set to false.
+ # Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
+ # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
+ # In the last example, the +entity_filter+ argument is ignored.
+ #
+ # +illegal+ INTERNAL USE ONLY
+ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
+ entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
+
+ @raw = false
+ @parent = nil
+
+ if parent
+ super( parent )
+ @raw = parent.raw
+ end
+
+ @raw = raw unless raw.nil?
+ @entity_filter = entity_filter
+ clear_cache
+
+ if arg.kind_of? String
+ @string = arg.dup
+ @string.squeeze!(" \n\t") unless respect_whitespace
+ elsif arg.kind_of? Text
+ @string = arg.to_s
+ @raw = arg.raw
+ elsif
+ raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
+ end
+
+ @string.gsub!( /\r\n?/, "\n" )
+
+ Text.check(@string, illegal, doctype) if @raw
+ end
+
+ def parent= parent
+ super(parent)
+ Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
+ end
+
+ # check for illegal characters
+ def Text.check string, pattern, doctype
+
+ # illegal anywhere
+ if string !~ VALID_XML_CHARS
+ if String.method_defined? :encode
+ string.chars.each do |c|
+ case c.ord
+ when *VALID_CHAR
+ else
+ raise "Illegal character #{c.inspect} in raw string \"#{string}\""
+ end
+ end
+ else
+ string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
+ case c.unpack('U')
+ when *VALID_CHAR
+ else
+ raise "Illegal character #{c.inspect} in raw string \"#{string}\""
+ end
+ end
+ end
+ end
+
+ # context sensitive
+ string.scan(pattern) do
+ if $1[-1] != ?;
+ raise "Illegal character '#{$1}' in raw string \"#{string}\""
+ elsif $1[0] == ?&
+ if $5 and $5[0] == ?#
+ case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
+ when *VALID_CHAR
+ else
+ raise "Illegal character '#{$1}' in raw string \"#{string}\""
+ end
+ # FIXME: below can't work but this needs API change.
+ # elsif @parent and $3 and !SUBSTITUTES.include?($1)
+ # if !doctype or !doctype.entities.has_key?($3)
+ # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
+ # end
+ end
+ end
+ end
+ end
+
+ def node_type
+ :text
+ end
+
+ def empty?
+ @string.size==0
+ end
+
+
+ def clone
+ return Text.new(self)
+ end
+
+
+ # Appends text to this text node. The text is appended in the +raw+ mode
+ # of this text node.
+ #
+ # +returns+ the text itself to enable method chain like
+ # 'text << "XXX" << "YYY"'.
+ def <<( to_append )
+ @string << to_append.gsub( /\r\n?/, "\n" )
+ clear_cache
+ self
+ end
+
+
+ # +other+ a String or a Text
+ # +returns+ the result of (to_s <=> arg.to_s)
+ def <=>( other )
+ to_s() <=> other.to_s
+ end
+
+ def doctype
+ if @parent
+ doc = @parent.document
+ doc.doctype if doc
+ end
+ end
+
+ REFERENCE = /#{Entity::REFERENCE}/
+ # Returns the string value of this text node. This string is always
+ # escaped, meaning that it is a valid XML text node string, and all
+ # entities that can be escaped, have been inserted. This method respects
+ # the entity filter set in the constructor.
+ #
+ # # Assume that the entity "s" is defined to be "sean", and that the
+ # # entity "r" is defined to be "russell"
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
+ # t.to_s #-> "&lt; &amp; &s; russell"
+ # t = Text.new( "< & &s; russell", false, nil, false )
+ # t.to_s #-> "&lt; &amp; &s; russell"
+ # u = Text.new( "sean russell", false, nil, true )
+ # u.to_s #-> "sean russell"
+ def to_s
+ return @string if @raw
+ return @normalized if @normalized
+
+ @normalized = Text::normalize( @string, doctype, @entity_filter )
+ end
+
+ def inspect
+ @string.inspect
+ end
+
+ # Returns the string value of this text. This is the text without
+ # entities, as it might be used programmatically, or printed to the
+ # console. This ignores the 'raw' attribute setting, and any
+ # entity_filter.
+ #
+ # # Assume that the entity "s" is defined to be "sean", and that the
+ # # entity "r" is defined to be "russell"
+ # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
+ # t.value #-> "< & sean russell"
+ # t = Text.new( "< & &s; russell", false, nil, false )
+ # t.value #-> "< & sean russell"
+ # u = Text.new( "sean russell", false, nil, true )
+ # u.value #-> "sean russell"
+ def value
+ return @unnormalized if @unnormalized
+ @unnormalized = Text::unnormalize( @string, doctype )
+ end
+
+ # Sets the contents of this text node. This expects the text to be
+ # unnormalized. It returns self.
+ #
+ # e = Element.new( "a" )
+ # e.add_text( "foo" ) # <a>foo</a>
+ # e[0].value = "bar" # <a>bar</a>
+ # e[0].value = "<a>" # <a>&lt;a&gt;</a>
+ def value=( val )
+ @string = val.gsub( /\r\n?/, "\n" )
+ clear_cache
+ @raw = false
+ end
+
+ def wrap(string, width, addnewline=false)
+ # Recursively wrap string at width.
+ return string if string.length <= width
+ place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
+ if addnewline then
+ return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
+ else
+ return string[0,place] + "\n" + wrap(string[place+1..-1], width)
+ end
+ end
+
+ def indent_text(string, level=1, style="\t", indentfirstline=true)
+ return string if level < 0
+ new_string = ''
+ string.each_line { |line|
+ indent_string = style * level
+ new_line = (indent_string + line).sub(/[\s]+$/,'')
+ new_string << new_line
+ }
+ new_string.strip! unless indentfirstline
+ return new_string
+ end
+
+ # == DEPRECATED
+ # See REXML::Formatters
+ #
+ def write( writer, indent=-1, transitive=false, ie_hack=false )
+ Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
+ formatter = if indent > -1
+ REXML::Formatters::Pretty.new( indent )
+ else
+ REXML::Formatters::Default.new
+ end
+ formatter.write( self, writer )
+ end
+
+ # FIXME
+ # This probably won't work properly
+ def xpath
+ path = @parent.xpath
+ path += "/text()"
+ return path
+ end
+
+ # Writes out text, substituting special characters beforehand.
+ # +out+ A String, IO, or any other object supporting <<( String )
+ # +input+ the text to substitute and the write out
+ #
+ # z=utf8.unpack("U*")
+ # ascOut=""
+ # z.each{|r|
+ # if r < 0x100
+ # ascOut.concat(r.chr)
+ # else
+ # ascOut.concat(sprintf("&#x%x;", r))
+ # end
+ # }
+ # puts ascOut
+ def write_with_substitution out, input
+ copy = input.clone
+ # Doing it like this rather than in a loop improves the speed
+ copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
+ copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
+ copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
+ copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
+ copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
+ copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
+ out << copy
+ end
+
+ private
+ def clear_cache
+ @normalized = nil
+ @unnormalized = nil
+ end
+
+ # Reads text, substituting entities
+ def Text::read_with_substitution( input, illegal=nil )
+ copy = input.clone
+
+ if copy =~ illegal
+ raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
+ end if illegal
+
+ copy.gsub!( /\r\n?/, "\n" )
+ if copy.include? ?&
+ copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
+ copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
+ copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
+ copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
+ copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
+ copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {
+ m=$1
+ #m='0' if m==''
+ m = "0#{m}" if m[0] == ?x
+ [Integer(m)].pack('U*')
+ }
+ end
+ copy
+ end
+
+ EREFERENCE = /&(?!#{Entity::NAME};)/
+ # Escapes all possible entities
+ def Text::normalize( input, doctype=nil, entity_filter=nil )
+ copy = input.to_s
+ # Doing it like this rather than in a loop improves the speed
+ #copy = copy.gsub( EREFERENCE, '&amp;' )
+ copy = copy.gsub( "&", "&amp;" )
+ if doctype
+ # Replace all ampersands that aren't part of an entity
+ doctype.entities.each_value do |entity|
+ copy = copy.gsub( entity.value,
+ "&#{entity.name};" ) if entity.value and
+ not( entity_filter and entity_filter.include?(entity.name) )
+ end
+ else
+ # Replace all ampersands that aren't part of an entity
+ DocType::DEFAULT_ENTITIES.each_value do |entity|
+ copy = copy.gsub(entity.value, "&#{entity.name};" )
+ end
+ end
+ copy
+ end
+
+ # Unescapes all possible entities
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
+ sum = 0
+ string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
+ s = Text.expand($&, doctype, filter)
+ if sum + s.bytesize > Security.entity_expansion_text_limit
+ raise "entity expansion has grown too large"
+ else
+ sum += s.bytesize
+ end
+ s
+ }
+ end
+
+ def Text.expand(ref, doctype, filter)
+ if ref[1] == ?#
+ if ref[2] == ?x
+ [ref[3...-1].to_i(16)].pack('U*')
+ else
+ [ref[2...-1].to_i].pack('U*')
+ end
+ elsif ref == '&amp;'
+ '&'
+ elsif filter and filter.include?( ref[1...-1] )
+ ref
+ elsif doctype
+ doctype.entity( ref[1...-1] ) or ref
+ else
+ entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
+ entity_value ? entity_value.value : ref
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/undefinednamespaceexception.rb b/jni/ruby/lib/rexml/undefinednamespaceexception.rb
new file mode 100644
index 0000000..8ebfdfd
--- /dev/null
+++ b/jni/ruby/lib/rexml/undefinednamespaceexception.rb
@@ -0,0 +1,8 @@
+require 'rexml/parseexception'
+module REXML
+ class UndefinedNamespaceException < ParseException
+ def initialize( prefix, source, parser )
+ super( "Undefined prefix #{prefix} found" )
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/validation/relaxng.rb b/jni/ruby/lib/rexml/validation/relaxng.rb
new file mode 100644
index 0000000..370efd5
--- /dev/null
+++ b/jni/ruby/lib/rexml/validation/relaxng.rb
@@ -0,0 +1,538 @@
+require "rexml/validation/validation"
+require "rexml/parsers/baseparser"
+
+module REXML
+ module Validation
+ # Implemented:
+ # * empty
+ # * element
+ # * attribute
+ # * text
+ # * optional
+ # * choice
+ # * oneOrMore
+ # * zeroOrMore
+ # * group
+ # * value
+ # * interleave
+ # * mixed
+ # * ref
+ # * grammar
+ # * start
+ # * define
+ #
+ # Not implemented:
+ # * data
+ # * param
+ # * include
+ # * externalRef
+ # * notAllowed
+ # * anyName
+ # * nsName
+ # * except
+ # * name
+ class RelaxNG
+ include Validator
+
+ INFINITY = 1.0 / 0.0
+ EMPTY = Event.new( nil )
+ TEXT = [:start_element, "text"]
+ attr_accessor :current
+ attr_accessor :count
+ attr_reader :references
+
+ # FIXME: Namespaces
+ def initialize source
+ parser = REXML::Parsers::BaseParser.new( source )
+
+ @count = 0
+ @references = {}
+ @root = @current = Sequence.new(self)
+ @root.previous = true
+ states = [ @current ]
+ begin
+ event = parser.pull
+ case event[0]
+ when :start_element
+ case event[1]
+ when "empty"
+ when "element", "attribute", "text", "value"
+ states[-1] << event
+ when "optional"
+ states << Optional.new( self )
+ states[-2] << states[-1]
+ when "choice"
+ states << Choice.new( self )
+ states[-2] << states[-1]
+ when "oneOrMore"
+ states << OneOrMore.new( self )
+ states[-2] << states[-1]
+ when "zeroOrMore"
+ states << ZeroOrMore.new( self )
+ states[-2] << states[-1]
+ when "group"
+ states << Sequence.new( self )
+ states[-2] << states[-1]
+ when "interleave"
+ states << Interleave.new( self )
+ states[-2] << states[-1]
+ when "mixed"
+ states << Interleave.new( self )
+ states[-2] << states[-1]
+ states[-1] << TEXT
+ when "define"
+ states << [ event[2]["name"] ]
+ when "ref"
+ states[-1] << Ref.new( event[2]["name"] )
+ when "anyName"
+ states << AnyName.new( self )
+ states[-2] << states[-1]
+ when "nsName"
+ when "except"
+ when "name"
+ when "data"
+ when "param"
+ when "include"
+ when "grammar"
+ when "start"
+ when "externalRef"
+ when "notAllowed"
+ end
+ when :end_element
+ case event[1]
+ when "element", "attribute"
+ states[-1] << event
+ when "zeroOrMore", "oneOrMore", "choice", "optional",
+ "interleave", "group", "mixed"
+ states.pop
+ when "define"
+ ref = states.pop
+ @references[ ref.shift ] = ref
+ #when "empty"
+ end
+ when :end_document
+ states[-1] << event
+ when :text
+ states[-1] << event
+ end
+ end while event[0] != :end_document
+ end
+
+ def receive event
+ validate( event )
+ end
+ end
+
+ class State
+ def initialize( context )
+ @previous = []
+ @events = []
+ @current = 0
+ @count = context.count += 1
+ @references = context.references
+ @value = false
+ end
+
+ def reset
+ return if @current == 0
+ @current = 0
+ @events.each {|s| s.reset if s.kind_of? State }
+ end
+
+ def previous=( previous )
+ @previous << previous
+ end
+
+ def next( event )
+ #print "In next with #{event.inspect}. "
+ #p @previous
+ return @previous.pop.next( event ) if @events[@current].nil?
+ expand_ref_in( @events, @current ) if @events[@current].class == Ref
+ if ( @events[@current].kind_of? State )
+ @current += 1
+ @events[@current-1].previous = self
+ return @events[@current-1].next( event )
+ end
+ if ( @events[@current].matches?(event) )
+ @current += 1
+ if @events[@current].nil?
+ return @previous.pop
+ elsif @events[@current].kind_of? State
+ @current += 1
+ @events[@current-1].previous = self
+ return @events[@current-1]
+ else
+ return self
+ end
+ else
+ return nil
+ end
+ end
+
+ def to_s
+ # Abbreviated:
+ self.class.name =~ /(?:::)(\w)\w+$/
+ # Full:
+ #self.class.name =~ /(?:::)(\w+)$/
+ "#$1.#@count"
+ end
+
+ def inspect
+ "< #{to_s} #{@events.collect{|e|
+ pre = e == @events[@current] ? '#' : ''
+ pre + e.inspect unless self == e
+ }.join(', ')} >"
+ end
+
+ def expected
+ return [@events[@current]]
+ end
+
+ def <<( event )
+ add_event_to_arry( @events, event )
+ end
+
+
+ protected
+ def expand_ref_in( arry, ind )
+ new_events = []
+ @references[ arry[ind].to_s ].each{ |evt|
+ add_event_to_arry(new_events,evt)
+ }
+ arry[ind,1] = new_events
+ end
+
+ def add_event_to_arry( arry, evt )
+ evt = generate_event( evt )
+ if evt.kind_of? String
+ arry[-1].event_arg = evt if arry[-1].kind_of? Event and @value
+ @value = false
+ else
+ arry << evt
+ end
+ end
+
+ def generate_event( event )
+ return event if event.kind_of? State or event.class == Ref
+ evt = nil
+ arg = nil
+ case event[0]
+ when :start_element
+ case event[1]
+ when "element"
+ evt = :start_element
+ arg = event[2]["name"]
+ when "attribute"
+ evt = :start_attribute
+ arg = event[2]["name"]
+ when "text"
+ evt = :text
+ when "value"
+ evt = :text
+ @value = true
+ end
+ when :text
+ return event[1]
+ when :end_document
+ return Event.new( event[0] )
+ else # then :end_element
+ case event[1]
+ when "element"
+ evt = :end_element
+ when "attribute"
+ evt = :end_attribute
+ end
+ end
+ return Event.new( evt, arg )
+ end
+ end
+
+
+ class Sequence < State
+ def matches?(event)
+ @events[@current].matches?( event )
+ end
+ end
+
+
+ class Optional < State
+ def next( event )
+ if @current == 0
+ rv = super
+ return rv if rv
+ @prior = @previous.pop
+ return @prior.next( event )
+ end
+ super
+ end
+
+ def matches?(event)
+ @events[@current].matches?(event) ||
+ (@current == 0 and @previous[-1].matches?(event))
+ end
+
+ def expected
+ return [ @prior.expected, @events[0] ].flatten if @current == 0
+ return [@events[@current]]
+ end
+ end
+
+
+ class ZeroOrMore < Optional
+ def next( event )
+ expand_ref_in( @events, @current ) if @events[@current].class == Ref
+ if ( @events[@current].matches?(event) )
+ @current += 1
+ if @events[@current].nil?
+ @current = 0
+ return self
+ elsif @events[@current].kind_of? State
+ @current += 1
+ @events[@current-1].previous = self
+ return @events[@current-1]
+ else
+ return self
+ end
+ else
+ @prior = @previous.pop
+ return @prior.next( event ) if @current == 0
+ return nil
+ end
+ end
+
+ def expected
+ return [ @prior.expected, @events[0] ].flatten if @current == 0
+ return [@events[@current]]
+ end
+ end
+
+
+ class OneOrMore < State
+ def initialize context
+ super
+ @ord = 0
+ end
+
+ def reset
+ super
+ @ord = 0
+ end
+
+ def next( event )
+ expand_ref_in( @events, @current ) if @events[@current].class == Ref
+ if ( @events[@current].matches?(event) )
+ @current += 1
+ @ord += 1
+ if @events[@current].nil?
+ @current = 0
+ return self
+ elsif @events[@current].kind_of? State
+ @current += 1
+ @events[@current-1].previous = self
+ return @events[@current-1]
+ else
+ return self
+ end
+ else
+ return @previous.pop.next( event ) if @current == 0 and @ord > 0
+ return nil
+ end
+ end
+
+ def matches?( event )
+ @events[@current].matches?(event) ||
+ (@current == 0 and @ord > 0 and @previous[-1].matches?(event))
+ end
+
+ def expected
+ if @current == 0 and @ord > 0
+ return [@previous[-1].expected, @events[0]].flatten
+ else
+ return [@events[@current]]
+ end
+ end
+ end
+
+
+ class Choice < State
+ def initialize context
+ super
+ @choices = []
+ end
+
+ def reset
+ super
+ @events = []
+ @choices.each { |c| c.each { |s| s.reset if s.kind_of? State } }
+ end
+
+ def <<( event )
+ add_event_to_arry( @choices, event )
+ end
+
+ def next( event )
+ # Make the choice if we haven't
+ if @events.size == 0
+ c = 0 ; max = @choices.size
+ while c < max
+ if @choices[c][0].class == Ref
+ expand_ref_in( @choices[c], 0 )
+ @choices += @choices[c]
+ @choices.delete( @choices[c] )
+ max -= 1
+ else
+ c += 1
+ end
+ end
+ @events = @choices.find { |evt| evt[0].matches? event }
+ # Remove the references
+ # Find the events
+ end
+ unless @events
+ @events = []
+ return nil
+ end
+ super
+ end
+
+ def matches?( event )
+ return @events[@current].matches?( event ) if @events.size > 0
+ !@choices.find{|evt| evt[0].matches?(event)}.nil?
+ end
+
+ def expected
+ return [@events[@current]] if @events.size > 0
+ return @choices.collect do |x|
+ if x[0].kind_of? State
+ x[0].expected
+ else
+ x[0]
+ end
+ end.flatten
+ end
+
+ def inspect
+ "< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' or ')} >"
+ end
+
+ protected
+ def add_event_to_arry( arry, evt )
+ if evt.kind_of? State or evt.class == Ref
+ arry << [evt]
+ elsif evt[0] == :text
+ if arry[-1] and
+ arry[-1][-1].kind_of?( Event ) and
+ arry[-1][-1].event_type == :text and @value
+
+ arry[-1][-1].event_arg = evt[1]
+ @value = false
+ end
+ else
+ arry << [] if evt[0] == :start_element
+ arry[-1] << generate_event( evt )
+ end
+ end
+ end
+
+
+ class Interleave < Choice
+ def initialize context
+ super
+ @choice = 0
+ end
+
+ def reset
+ @choice = 0
+ end
+
+ def next_current( event )
+ # Expand references
+ c = 0 ; max = @choices.size
+ while c < max
+ if @choices[c][0].class == Ref
+ expand_ref_in( @choices[c], 0 )
+ @choices += @choices[c]
+ @choices.delete( @choices[c] )
+ max -= 1
+ else
+ c += 1
+ end
+ end
+ @events = @choices[@choice..-1].find { |evt| evt[0].matches? event }
+ @current = 0
+ if @events
+ # reorder the choices
+ old = @choices[@choice]
+ idx = @choices.index( @events )
+ @choices[@choice] = @events
+ @choices[idx] = old
+ @choice += 1
+ end
+
+ @events = [] unless @events
+ end
+
+
+ def next( event )
+ # Find the next series
+ next_current(event) unless @events[@current]
+ return nil unless @events[@current]
+
+ expand_ref_in( @events, @current ) if @events[@current].class == Ref
+ if ( @events[@current].kind_of? State )
+ @current += 1
+ @events[@current-1].previous = self
+ return @events[@current-1].next( event )
+ end
+ return @previous.pop.next( event ) if @events[@current].nil?
+ if ( @events[@current].matches?(event) )
+ @current += 1
+ if @events[@current].nil?
+ return self unless @choices[@choice].nil?
+ return @previous.pop
+ elsif @events[@current].kind_of? State
+ @current += 1
+ @events[@current-1].previous = self
+ return @events[@current-1]
+ else
+ return self
+ end
+ else
+ return nil
+ end
+ end
+
+ def matches?( event )
+ return @events[@current].matches?( event ) if @events[@current]
+ !@choices[@choice..-1].find{|evt| evt[0].matches?(event)}.nil?
+ end
+
+ def expected
+ return [@events[@current]] if @events[@current]
+ return @choices[@choice..-1].collect do |x|
+ if x[0].kind_of? State
+ x[0].expected
+ else
+ x[0]
+ end
+ end.flatten
+ end
+
+ def inspect
+ "< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' and ')} >"
+ end
+ end
+
+ class Ref
+ def initialize value
+ @value = value
+ end
+ def to_s
+ @value
+ end
+ def inspect
+ "{#{to_s}}"
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/validation/validation.rb b/jni/ruby/lib/rexml/validation/validation.rb
new file mode 100644
index 0000000..bab7f22
--- /dev/null
+++ b/jni/ruby/lib/rexml/validation/validation.rb
@@ -0,0 +1,143 @@
+require 'rexml/validation/validationexception'
+
+module REXML
+ module Validation
+ module Validator
+ NILEVENT = [ nil ]
+ def reset
+ @current = @root
+ @root.reset
+ @root.previous = true
+ @attr_stack = []
+ self
+ end
+ def dump
+ puts @root.inspect
+ end
+ def validate( event )
+ @attr_stack = [] unless defined? @attr_stack
+ match = @current.next(event)
+ raise ValidationException.new( "Validation error. Expected: "+
+ @current.expected.join( " or " )+" from #{@current.inspect} "+
+ " but got #{Event.new( event[0], event[1] ).inspect}" ) unless match
+ @current = match
+
+ # Check for attributes
+ case event[0]
+ when :start_element
+ @attr_stack << event[2]
+ begin
+ sattr = [:start_attribute, nil]
+ eattr = [:end_attribute]
+ text = [:text, nil]
+ k, = event[2].find { |key,value|
+ sattr[1] = key
+ m = @current.next( sattr )
+ if m
+ # If the state has text children...
+ if m.matches?( eattr )
+ @current = m
+ else
+ text[1] = value
+ m = m.next( text )
+ text[1] = nil
+ return false unless m
+ @current = m if m
+ end
+ m = @current.next( eattr )
+ if m
+ @current = m
+ true
+ else
+ false
+ end
+ else
+ false
+ end
+ }
+ event[2].delete(k) if k
+ end while k
+ when :end_element
+ attrs = @attr_stack.pop
+ raise ValidationException.new( "Validation error. Illegal "+
+ " attributes: #{attrs.inspect}") if attrs.length > 0
+ end
+ end
+ end
+
+ class Event
+ def initialize(event_type, event_arg=nil )
+ @event_type = event_type
+ @event_arg = event_arg
+ end
+
+ attr_reader :event_type
+ attr_accessor :event_arg
+
+ def done?
+ @done
+ end
+
+ def single?
+ return (@event_type != :start_element and @event_type != :start_attribute)
+ end
+
+ def matches?( event )
+ return false unless event[0] == @event_type
+ case event[0]
+ when nil
+ return true
+ when :start_element
+ return true if event[1] == @event_arg
+ when :end_element
+ return true
+ when :start_attribute
+ return true if event[1] == @event_arg
+ when :end_attribute
+ return true
+ when :end_document
+ return true
+ when :text
+ return (@event_arg.nil? or @event_arg == event[1])
+=begin
+ when :processing_instruction
+ false
+ when :xmldecl
+ false
+ when :start_doctype
+ false
+ when :end_doctype
+ false
+ when :externalentity
+ false
+ when :elementdecl
+ false
+ when :entity
+ false
+ when :attlistdecl
+ false
+ when :notationdecl
+ false
+ when :end_doctype
+ false
+=end
+ else
+ false
+ end
+ end
+
+ def ==( other )
+ return false unless other.kind_of? Event
+ @event_type == other.event_type and @event_arg == other.event_arg
+ end
+
+ def to_s
+ inspect
+ end
+
+ def inspect
+ "#{@event_type.inspect}( #@event_arg )"
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/validation/validationexception.rb b/jni/ruby/lib/rexml/validation/validationexception.rb
new file mode 100644
index 0000000..4723d9e
--- /dev/null
+++ b/jni/ruby/lib/rexml/validation/validationexception.rb
@@ -0,0 +1,9 @@
+module REXML
+ module Validation
+ class ValidationException < RuntimeError
+ def initialize msg
+ super
+ end
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/xmldecl.rb b/jni/ruby/lib/rexml/xmldecl.rb
new file mode 100644
index 0000000..465e6ab
--- /dev/null
+++ b/jni/ruby/lib/rexml/xmldecl.rb
@@ -0,0 +1,115 @@
+require 'rexml/encoding'
+require 'rexml/source'
+
+module REXML
+ # NEEDS DOCUMENTATION
+ class XMLDecl < Child
+ include Encoding
+
+ DEFAULT_VERSION = "1.0";
+ DEFAULT_ENCODING = "UTF-8";
+ DEFAULT_STANDALONE = "no";
+ START = '<\?xml';
+ STOP = '\?>';
+
+ attr_accessor :version, :standalone
+ attr_reader :writeencoding, :writethis
+
+ def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
+ @writethis = true
+ @writeencoding = !encoding.nil?
+ if version.kind_of? XMLDecl
+ super()
+ @version = version.version
+ self.encoding = version.encoding
+ @writeencoding = version.writeencoding
+ @standalone = version.standalone
+ else
+ super()
+ @version = version
+ self.encoding = encoding
+ @standalone = standalone
+ end
+ @version = DEFAULT_VERSION if @version.nil?
+ end
+
+ def clone
+ XMLDecl.new(self)
+ end
+
+ # indent::
+ # Ignored. There must be no whitespace before an XML declaration
+ # transitive::
+ # Ignored
+ # ie_hack::
+ # Ignored
+ def write(writer, indent=-1, transitive=false, ie_hack=false)
+ return nil unless @writethis or writer.kind_of? Output
+ writer << START.sub(/\\/u, '')
+ writer << " #{content encoding}"
+ writer << STOP.sub(/\\/u, '')
+ end
+
+ def ==( other )
+ other.kind_of?(XMLDecl) and
+ other.version == @version and
+ other.encoding == self.encoding and
+ other.standalone == @standalone
+ end
+
+ def xmldecl version, encoding, standalone
+ @version = version
+ self.encoding = encoding
+ @standalone = standalone
+ end
+
+ def node_type
+ :xmldecl
+ end
+
+ alias :stand_alone? :standalone
+ alias :old_enc= :encoding=
+
+ def encoding=( enc )
+ if enc.nil?
+ self.old_enc = "UTF-8"
+ @writeencoding = false
+ else
+ self.old_enc = enc
+ @writeencoding = true
+ end
+ self.dowrite
+ end
+
+ # Only use this if you do not want the XML declaration to be written;
+ # this object is ignored by the XML writer. Otherwise, instantiate your
+ # own XMLDecl and add it to the document.
+ #
+ # Note that XML 1.1 documents *must* include an XML declaration
+ def XMLDecl.default
+ rv = XMLDecl.new( "1.0" )
+ rv.nowrite
+ rv
+ end
+
+ def nowrite
+ @writethis = false
+ end
+
+ def dowrite
+ @writethis = true
+ end
+
+ def inspect
+ START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
+ end
+
+ private
+ def content(enc)
+ rv = "version='#@version'"
+ rv << " encoding='#{enc}'" if @writeencoding || enc !~ /\Autf-8\z/i
+ rv << " standalone='#@standalone'" if @standalone
+ rv
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/xmltokens.rb b/jni/ruby/lib/rexml/xmltokens.rb
new file mode 100644
index 0000000..4d4dd27
--- /dev/null
+++ b/jni/ruby/lib/rexml/xmltokens.rb
@@ -0,0 +1,84 @@
+module REXML
+ # Defines a number of tokens used for parsing XML. Not for general
+ # consumption.
+ module XMLTokens
+ # From http://www.w3.org/TR/REC-xml/#sec-common-syn
+ #
+ # [4] NameStartChar ::=
+ # ":" |
+ # [A-Z] |
+ # "_" |
+ # [a-z] |
+ # [#xC0-#xD6] |
+ # [#xD8-#xF6] |
+ # [#xF8-#x2FF] |
+ # [#x370-#x37D] |
+ # [#x37F-#x1FFF] |
+ # [#x200C-#x200D] |
+ # [#x2070-#x218F] |
+ # [#x2C00-#x2FEF] |
+ # [#x3001-#xD7FF] |
+ # [#xF900-#xFDCF] |
+ # [#xFDF0-#xFFFD] |
+ # [#x10000-#xEFFFF]
+ name_start_chars = [
+ ":",
+ "A-Z",
+ "_",
+ "a-z",
+ "\\u00C0-\\u00D6",
+ "\\u00D8-\\u00F6",
+ "\\u00F8-\\u02FF",
+ "\\u0370-\\u037D",
+ "\\u037F-\\u1FFF",
+ "\\u200C-\\u200D",
+ "\\u2070-\\u218F",
+ "\\u2C00-\\u2FEF",
+ "\\u3001-\\uD7FF",
+ "\\uF900-\\uFDCF",
+ "\\uFDF0-\\uFFFD",
+ "\\u{10000}-\\u{EFFFF}",
+ ]
+ # From http://www.w3.org/TR/REC-xml/#sec-common-syn
+ #
+ # [4a] NameChar ::=
+ # NameStartChar |
+ # "-" |
+ # "." |
+ # [0-9] |
+ # #xB7 |
+ # [#x0300-#x036F] |
+ # [#x203F-#x2040]
+ name_chars = name_start_chars + [
+ "\\-",
+ "\\.",
+ "0-9",
+ "\\u00B7",
+ "\\u0300-\\u036F",
+ "\\u203F-\\u2040",
+ ]
+ NAME_START_CHAR = "[#{name_start_chars.join('')}]"
+ NAME_CHAR = "[#{name_chars.join('')}]"
+ NAMECHAR = NAME_CHAR # deprecated. Use NAME_CHAR instead.
+
+ # From http://www.w3.org/TR/xml-names11/#NT-NCName
+ #
+ # [6] NCNameStartChar ::= NameStartChar - ':'
+ ncname_start_chars = name_start_chars - [":"]
+ # From http://www.w3.org/TR/xml-names11/#NT-NCName
+ #
+ # [5] NCNameChar ::= NameChar - ':'
+ ncname_chars = name_chars - [":"]
+ NCNAME_STR = "[#{ncname_start_chars.join('')}][#{ncname_chars.join('')}]*"
+ NAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+
+ NAME = "(#{NAME_START_CHAR}#{NAME_CHAR}*)"
+ NMTOKEN = "(?:#{NAME_CHAR})+"
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+ REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+
+ #REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
+ #ENTITYREF = "&#{NAME};"
+ #CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
+ end
+end
diff --git a/jni/ruby/lib/rexml/xpath.rb b/jni/ruby/lib/rexml/xpath.rb
new file mode 100644
index 0000000..0f99808
--- /dev/null
+++ b/jni/ruby/lib/rexml/xpath.rb
@@ -0,0 +1,80 @@
+require 'rexml/functions'
+require 'rexml/xpath_parser'
+
+module REXML
+ # Wrapper class. Use this class to access the XPath functions.
+ class XPath
+ include Functions
+ # A base Hash object, supposing to be used when initializing a
+ # default empty namespaces set, but is currently unused.
+ # TODO: either set the namespaces=EMPTY_HASH, or deprecate this.
+ EMPTY_HASH = {}
+
+ # Finds and returns the first node that matches the supplied xpath.
+ # element::
+ # The context element
+ # path::
+ # The xpath to search for. If not supplied or nil, returns the first
+ # node matching '*'.
+ # namespaces::
+ # If supplied, a Hash which defines a namespace mapping.
+ # variables::
+ # If supplied, a Hash which maps $variables in the query
+ # to values. This can be used to avoid XPath injection attacks
+ # or to automatically handle escaping string values.
+ #
+ # XPath.first( node )
+ # XPath.first( doc, "//b"} )
+ # XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
+ # XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"})
+ def XPath::first element, path=nil, namespaces=nil, variables={}
+ raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
+ raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path, element).flatten[0]
+ end
+
+ # Iterates over nodes that match the given path, calling the supplied
+ # block with the match.
+ # element::
+ # The context element
+ # path::
+ # The xpath to search for. If not supplied or nil, defaults to '*'
+ # namespaces::
+ # If supplied, a Hash which defines a namespace mapping
+ # variables::
+ # If supplied, a Hash which maps $variables in the query
+ # to values. This can be used to avoid XPath injection attacks
+ # or to automatically handle escaping string values.
+ #
+ # XPath.each( node ) { |el| ... }
+ # XPath.each( node, '/*[@attr='v']' ) { |el| ... }
+ # XPath.each( node, 'ancestor::x' ) { |el| ... }
+ # XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \
+ # {|el| ... }
+ def XPath::each element, path=nil, namespaces=nil, variables={}, &block
+ raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
+ raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path, element).each( &block )
+ end
+
+ # Returns an array of nodes matching a given XPath.
+ def XPath::match element, path=nil, namespaces=nil, variables={}
+ parser = XPathParser.new
+ parser.namespaces = namespaces
+ parser.variables = variables
+ path = "*" unless path
+ element = [element] unless element.kind_of? Array
+ parser.parse(path,element)
+ end
+ end
+end
diff --git a/jni/ruby/lib/rexml/xpath_parser.rb b/jni/ruby/lib/rexml/xpath_parser.rb
new file mode 100644
index 0000000..ef49a32
--- /dev/null
+++ b/jni/ruby/lib/rexml/xpath_parser.rb
@@ -0,0 +1,703 @@
+require 'rexml/namespace'
+require 'rexml/xmltokens'
+require 'rexml/attribute'
+require 'rexml/syncenumerator'
+require 'rexml/parsers/xpathparser'
+
+class Object
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone
+ clone
+ end
+end
+class Symbol
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+end
+class Fixnum
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+end
+class Float
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+end
+class Array
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object+ types
+ def dclone
+ klone = self.clone
+ klone.clear
+ self.each{|v| klone << v.dclone}
+ klone
+ end
+end
+
+module REXML
+ # You don't want to use this class. Really. Use XPath, which is a wrapper
+ # for this class. Believe me. You don't want to poke around in here.
+ # There is strange, dark magic at work in this code. Beware. Go back! Go
+ # back while you still can!
+ class XPathParser
+ include XMLTokens
+ LITERAL = /^'([^']*)'|^"([^"]*)"/u
+
+ def initialize( )
+ @parser = REXML::Parsers::XPathParser.new
+ @namespaces = nil
+ @variables = {}
+ end
+
+ def namespaces=( namespaces={} )
+ Functions::namespace_context = namespaces
+ @namespaces = namespaces
+ end
+
+ def variables=( vars={} )
+ Functions::variables = vars
+ @variables = vars
+ end
+
+ def parse path, nodeset
+ path_stack = @parser.parse( path )
+ match( path_stack, nodeset )
+ end
+
+ def get_first path, nodeset
+ path_stack = @parser.parse( path )
+ first( path_stack, nodeset )
+ end
+
+ def predicate path, nodeset
+ path_stack = @parser.parse( path )
+ expr( path_stack, nodeset )
+ end
+
+ def []=( variable_name, value )
+ @variables[ variable_name ] = value
+ end
+
+
+ # Performs a depth-first (document order) XPath search, and returns the
+ # first match. This is the fastest, lightest way to return a single result.
+ #
+ # FIXME: This method is incomplete!
+ def first( path_stack, node )
+ return nil if path.size == 0
+
+ case path[0]
+ when :document
+ # do nothing
+ return first( path[1..-1], node )
+ when :child
+ for c in node.children
+ r = first( path[1..-1], c )
+ return r if r
+ end
+ when :qname
+ name = path[2]
+ if node.name == name
+ return node if path.size == 3
+ return first( path[3..-1], node )
+ else
+ return nil
+ end
+ when :descendant_or_self
+ r = first( path[1..-1], node )
+ return r if r
+ for c in node.children
+ r = first( path, c )
+ return r if r
+ end
+ when :node
+ return first( path[1..-1], node )
+ when :any
+ return first( path[1..-1], node )
+ end
+ return nil
+ end
+
+
+ def match( path_stack, nodeset )
+ r = expr( path_stack, nodeset )
+ r
+ end
+
+ private
+
+
+ # Returns a String namespace for a node, given a prefix
+ # The rules are:
+ #
+ # 1. Use the supplied namespace mapping first.
+ # 2. If no mapping was supplied, use the context node to look up the namespace
+ def get_namespace( node, prefix )
+ if @namespaces
+ return @namespaces[prefix] || ''
+ else
+ return node.namespace( prefix ) if node.node_type == :element
+ return ''
+ end
+ end
+
+
+ # Expr takes a stack of path elements and a set of nodes (either a Parent
+ # or an Array and returns an Array of matching nodes
+ ALL = [ :attribute, :element, :text, :processing_instruction, :comment ]
+ ELEMENTS = [ :element ]
+ def expr( path_stack, nodeset, context=nil )
+ node_types = ELEMENTS
+ return nodeset if path_stack.length == 0 || nodeset.length == 0
+ while path_stack.length > 0
+ if nodeset.length == 0
+ path_stack.clear
+ return []
+ end
+ case (op = path_stack.shift)
+ when :document
+ nodeset = [ nodeset[0].root_node ]
+
+ when :qname
+ prefix = path_stack.shift
+ name = path_stack.shift
+ nodeset.delete_if do |node|
+ # FIXME: This DOUBLES the time XPath searches take
+ ns = get_namespace( node, prefix )
+ if node.node_type == :element
+ if node.name == name
+ end
+ end
+ !(node.node_type == :element and
+ node.name == name and
+ node.namespace == ns )
+ end
+ node_types = ELEMENTS
+
+ when :any
+ nodeset.delete_if { |node| !node_types.include?(node.node_type) }
+
+ when :self
+ # This space left intentionally blank
+
+ when :processing_instruction
+ target = path_stack.shift
+ nodeset.delete_if do |node|
+ (node.node_type != :processing_instruction) or
+ ( target!='' and ( node.target != target ) )
+ end
+
+ when :text
+ nodeset.delete_if { |node| node.node_type != :text }
+
+ when :comment
+ nodeset.delete_if { |node| node.node_type != :comment }
+
+ when :node
+ # This space left intentionally blank
+ node_types = ALL
+
+ when :child
+ new_nodeset = []
+ nt = nil
+ nodeset.each do |node|
+ nt = node.node_type
+ new_nodeset += node.children if nt == :element or nt == :document
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :literal
+ return path_stack.shift
+
+ when :attribute
+ new_nodeset = []
+ case path_stack.shift
+ when :qname
+ prefix = path_stack.shift
+ name = path_stack.shift
+ for element in nodeset
+ if element.node_type == :element
+ attrib = element.attribute( name, get_namespace(element, prefix) )
+ new_nodeset << attrib if attrib
+ end
+ end
+ when :any
+ for element in nodeset
+ if element.node_type == :element
+ new_nodeset += element.attributes.to_a
+ end
+ end
+ end
+ nodeset = new_nodeset
+
+ when :parent
+ nodeset = nodeset.collect{|n| n.parent}.compact
+ #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact)
+ node_types = ELEMENTS
+
+ when :ancestor
+ new_nodeset = []
+ nodeset.each do |node|
+ while node.parent
+ node = node.parent
+ new_nodeset << node unless new_nodeset.include? node
+ end
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :ancestor_or_self
+ new_nodeset = []
+ nodeset.each do |node|
+ if node.node_type == :element
+ new_nodeset << node
+ while ( node.parent )
+ node = node.parent
+ new_nodeset << node unless new_nodeset.include? node
+ end
+ end
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :predicate
+ new_nodeset = []
+ subcontext = { :size => nodeset.size }
+ pred = path_stack.shift
+ nodeset.each_with_index { |node, index|
+ subcontext[ :node ] = node
+ subcontext[ :index ] = index+1
+ pc = pred.dclone
+ result = expr( pc, [node], subcontext )
+ result = result[0] if result.kind_of? Array and result.length == 1
+ if result.kind_of? Numeric
+ new_nodeset << node if result == (index+1)
+ elsif result.instance_of? Array
+ if result.size > 0 and result.inject(false) {|k,s| s or k}
+ new_nodeset << node if result.size > 0
+ end
+ else
+ new_nodeset << node if result
+ end
+ }
+ nodeset = new_nodeset
+=begin
+ predicate = path_stack.shift
+ ns = nodeset.clone
+ result = expr( predicate, ns )
+ if result.kind_of? Array
+ nodeset = result.zip(ns).collect{|m,n| n if m}.compact
+ else
+ nodeset = result ? nodeset : []
+ end
+=end
+
+ when :descendant_or_self
+ rv = descendant_or_self( path_stack, nodeset )
+ path_stack.clear
+ nodeset = rv
+ node_types = ELEMENTS
+
+ when :descendant
+ results = []
+ nt = nil
+ nodeset.each do |node|
+ nt = node.node_type
+ results += expr( path_stack.dclone.unshift( :descendant_or_self ),
+ node.children ) if nt == :element or nt == :document
+ end
+ nodeset = results
+ node_types = ELEMENTS
+
+ when :following_sibling
+ results = []
+ nodeset.each do |node|
+ next if node.parent.nil?
+ all_siblings = node.parent.children
+ current_index = all_siblings.index( node )
+ following_siblings = all_siblings[ current_index+1 .. -1 ]
+ results += expr( path_stack.dclone, following_siblings )
+ end
+ nodeset = results
+
+ when :preceding_sibling
+ results = []
+ nodeset.each do |node|
+ next if node.parent.nil?
+ all_siblings = node.parent.children
+ current_index = all_siblings.index( node )
+ preceding_siblings = all_siblings[ 0, current_index ].reverse
+ results += preceding_siblings
+ end
+ nodeset = results
+ node_types = ELEMENTS
+
+ when :preceding
+ new_nodeset = []
+ nodeset.each do |node|
+ new_nodeset += preceding( node )
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :following
+ new_nodeset = []
+ nodeset.each do |node|
+ new_nodeset += following( node )
+ end
+ nodeset = new_nodeset
+ node_types = ELEMENTS
+
+ when :namespace
+ new_nodeset = []
+ prefix = path_stack.shift
+ nodeset.each do |node|
+ if (node.node_type == :element or node.node_type == :attribute)
+ if @namespaces
+ namespaces = @namespaces
+ elsif (node.node_type == :element)
+ namespaces = node.namespaces
+ else
+ namespaces = node.element.namesapces
+ end
+ if (node.namespace == namespaces[prefix])
+ new_nodeset << node
+ end
+ end
+ end
+ nodeset = new_nodeset
+
+ when :variable
+ var_name = path_stack.shift
+ return @variables[ var_name ]
+
+ # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
+ # TODO: Special case for :or and :and -- not evaluate the right
+ # operand if the left alone determines result (i.e. is true for
+ # :or and false for :and).
+ when :eq, :neq, :lt, :lteq, :gt, :gteq, :or
+ left = expr( path_stack.shift, nodeset.dup, context )
+ right = expr( path_stack.shift, nodeset.dup, context )
+ res = equality_relational_compare( left, op, right )
+ return res
+
+ when :and
+ left = expr( path_stack.shift, nodeset.dup, context )
+ return [] unless left
+ if left.respond_to?(:inject) and !left.inject(false) {|a,b| a | b}
+ return []
+ end
+ right = expr( path_stack.shift, nodeset.dup, context )
+ res = equality_relational_compare( left, op, right )
+ return res
+
+ when :div
+ left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
+ return (left / right)
+
+ when :mod
+ left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ return (left % right)
+
+ when :mult
+ left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ return (left * right)
+
+ when :plus
+ left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ return (left + right)
+
+ when :minus
+ left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
+ return (left - right)
+
+ when :union
+ left = expr( path_stack.shift, nodeset, context )
+ right = expr( path_stack.shift, nodeset, context )
+ return (left | right)
+
+ when :neg
+ res = expr( path_stack, nodeset, context )
+ return -(res.to_f)
+
+ when :not
+ when :function
+ func_name = path_stack.shift.tr('-','_')
+ arguments = path_stack.shift
+ subcontext = context ? nil : { :size => nodeset.size }
+
+ res = []
+ cont = context
+ nodeset.each_with_index { |n, i|
+ if subcontext
+ subcontext[:node] = n
+ subcontext[:index] = i
+ cont = subcontext
+ end
+ arg_clone = arguments.dclone
+ args = arg_clone.collect { |arg|
+ expr( arg, [n], cont )
+ }
+ Functions.context = cont
+ res << Functions.send( func_name, *args )
+ }
+ return res
+
+ end
+ end # while
+ return nodeset
+ end
+
+
+ ##########################################################
+ # FIXME
+ # The next two methods are BAD MOJO!
+ # This is my achilles heel. If anybody thinks of a better
+ # way of doing this, be my guest. This really sucks, but
+ # it is a wonder it works at all.
+ # ########################################################
+
+ def descendant_or_self( path_stack, nodeset )
+ rs = []
+ d_o_s( path_stack, nodeset, rs )
+ document_order(rs.flatten.compact)
+ #rs.flatten.compact
+ end
+
+ def d_o_s( p, ns, r )
+ nt = nil
+ ns.each_index do |i|
+ n = ns[i]
+ x = expr( p.dclone, [ n ] )
+ nt = n.node_type
+ d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
+ r.concat(x) if x.size > 0
+ end
+ end
+
+
+ # Reorders an array of nodes so that they are in document order
+ # It tries to do this efficiently.
+ #
+ # FIXME: I need to get rid of this, but the issue is that most of the XPath
+ # interpreter functions as a filter, which means that we lose context going
+ # in and out of function calls. If I knew what the index of the nodes was,
+ # I wouldn't have to do this. Maybe add a document IDX for each node?
+ # Problems with mutable documents. Or, rewrite everything.
+ def document_order( array_of_nodes )
+ new_arry = []
+ array_of_nodes.each { |node|
+ node_idx = []
+ np = node.node_type == :attribute ? node.element : node
+ while np.parent and np.parent.node_type == :element
+ node_idx << np.parent.index( np )
+ np = np.parent
+ end
+ new_arry << [ node_idx.reverse, node ]
+ }
+ new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
+ end
+
+
+ def recurse( nodeset, &block )
+ for node in nodeset
+ yield node
+ recurse( node, &block ) if node.node_type == :element
+ end
+ end
+
+
+
+ # Builds a nodeset of all of the preceding nodes of the supplied node,
+ # in reverse document order
+ # preceding:: includes every element in the document that precedes this node,
+ # except for ancestors
+ def preceding( node )
+ ancestors = []
+ p = node.parent
+ while p
+ ancestors << p
+ p = p.parent
+ end
+
+ acc = []
+ p = preceding_node_of( node )
+ while p
+ if ancestors.include? p
+ ancestors.delete(p)
+ else
+ acc << p
+ end
+ p = preceding_node_of( p )
+ end
+ acc
+ end
+
+ def preceding_node_of( node )
+ psn = node.previous_sibling_node
+ if psn.nil?
+ if node.parent.nil? or node.parent.class == Document
+ return nil
+ end
+ return node.parent
+ #psn = preceding_node_of( node.parent )
+ end
+ while psn and psn.kind_of? Element and psn.children.size > 0
+ psn = psn.children[-1]
+ end
+ psn
+ end
+
+ def following( node )
+ acc = []
+ p = next_sibling_node( node )
+ while p
+ acc << p
+ p = following_node_of( p )
+ end
+ acc
+ end
+
+ def following_node_of( node )
+ if node.kind_of? Element and node.children.size > 0
+ return node.children[0]
+ end
+ return next_sibling_node(node)
+ end
+
+ def next_sibling_node(node)
+ psn = node.next_sibling_node
+ while psn.nil?
+ if node.parent.nil? or node.parent.class == Document
+ return nil
+ end
+ node = node.parent
+ psn = node.next_sibling_node
+ end
+ return psn
+ end
+
+ def norm b
+ case b
+ when true, false
+ return b
+ when 'true', 'false'
+ return Functions::boolean( b )
+ when /^\d+(\.\d+)?$/
+ return Functions::number( b )
+ else
+ return Functions::string( b )
+ end
+ end
+
+ def equality_relational_compare( set1, op, set2 )
+ if set1.kind_of? Array and set2.kind_of? Array
+ if set1.size == 1 and set2.size == 1
+ set1 = set1[0]
+ set2 = set2[0]
+ elsif set1.size == 0 or set2.size == 0
+ nd = set1.size==0 ? set2 : set1
+ rv = nd.collect { |il| compare( il, op, nil ) }
+ return rv
+ else
+ res = []
+ SyncEnumerator.new( set1, set2 ).each { |i1, i2|
+ i1 = norm( i1 )
+ i2 = norm( i2 )
+ res << compare( i1, op, i2 )
+ }
+ return res
+ end
+ end
+ # If one is nodeset and other is number, compare number to each item
+ # in nodeset s.t. number op number(string(item))
+ # If one is nodeset and other is string, compare string to each item
+ # in nodeset s.t. string op string(item)
+ # If one is nodeset and other is boolean, compare boolean to each item
+ # in nodeset s.t. boolean op boolean(item)
+ if set1.kind_of? Array or set2.kind_of? Array
+ if set1.kind_of? Array
+ a = set1
+ b = set2
+ else
+ a = set2
+ b = set1
+ end
+
+ case b
+ when true, false
+ return a.collect {|v| compare( Functions::boolean(v), op, b ) }
+ when Numeric
+ return a.collect {|v| compare( Functions::number(v), op, b )}
+ when /^\d+(\.\d+)?$/
+ b = Functions::number( b )
+ return a.collect {|v| compare( Functions::number(v), op, b )}
+ else
+ b = Functions::string( b )
+ return a.collect { |v| compare( Functions::string(v), op, b ) }
+ end
+ else
+ # If neither is nodeset,
+ # If op is = or !=
+ # If either boolean, convert to boolean
+ # If either number, convert to number
+ # Else, convert to string
+ # Else
+ # Convert both to numbers and compare
+ s1 = set1.to_s
+ s2 = set2.to_s
+ if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
+ set1 = Functions::boolean( set1 )
+ set2 = Functions::boolean( set2 )
+ else
+ if op == :eq or op == :neq
+ if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
+ set1 = Functions::number( s1 )
+ set2 = Functions::number( s2 )
+ else
+ set1 = Functions::string( set1 )
+ set2 = Functions::string( set2 )
+ end
+ else
+ set1 = Functions::number( set1 )
+ set2 = Functions::number( set2 )
+ end
+ end
+ return compare( set1, op, set2 )
+ end
+ return false
+ end
+
+ def compare a, op, b
+ case op
+ when :eq
+ a == b
+ when :neq
+ a != b
+ when :lt
+ a < b
+ when :lteq
+ a <= b
+ when :gt
+ a > b
+ when :gteq
+ a >= b
+ when :and
+ a and b
+ when :or
+ a or b
+ else
+ false
+ end
+ end
+ end
+end