diff options
author | Jari Vetoniemi <jari.vetoniemi@indooratlas.com> | 2020-03-16 18:49:26 +0900 |
---|---|---|
committer | Jari Vetoniemi <jari.vetoniemi@indooratlas.com> | 2020-03-30 00:39:06 +0900 |
commit | fcbf63e62c627deae76c1b8cb8c0876c536ed811 (patch) | |
tree | 64cb17de3f41a2b6fef2368028fbd00349946994 /jni/ruby/lib/rexml/parsers/sax2parser.rb |
Fresh start
Diffstat (limited to 'jni/ruby/lib/rexml/parsers/sax2parser.rb')
-rw-r--r-- | jni/ruby/lib/rexml/parsers/sax2parser.rb | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/jni/ruby/lib/rexml/parsers/sax2parser.rb b/jni/ruby/lib/rexml/parsers/sax2parser.rb new file mode 100644 index 0000000..a72c0a7 --- /dev/null +++ b/jni/ruby/lib/rexml/parsers/sax2parser.rb @@ -0,0 +1,272 @@ +require 'rexml/parsers/baseparser' +require 'rexml/parseexception' +require 'rexml/namespace' +require 'rexml/text' + +module REXML + module Parsers + # SAX2Parser + class SAX2Parser + def initialize source + @parser = BaseParser.new(source) + @listeners = [] + @procs = [] + @namespace_stack = [] + @has_listeners = false + @tag_stack = [] + @entities = {} + end + + def source + @parser.source + end + + def add_listener( listener ) + @parser.add_listener( listener ) + end + + # Listen arguments: + # + # Symbol, Array, Block + # Listen to Symbol events on Array elements + # Symbol, Block + # Listen to Symbol events + # Array, Listener + # Listen to all events on Array elements + # Array, Block + # Listen to :start_element events on Array elements + # Listener + # Listen to All events + # + # Symbol can be one of: :start_element, :end_element, + # :start_prefix_mapping, :end_prefix_mapping, :characters, + # :processing_instruction, :doctype, :attlistdecl, :elementdecl, + # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment + # + # There is an additional symbol that can be listened for: :progress. + # This will be called for every event generated, passing in the current + # stream position. + # + # Array contains regular expressions or strings which will be matched + # against fully qualified element names. + # + # Listener must implement the methods in SAX2Listener + # + # Block will be passed the same arguments as a SAX2Listener method would + # be, where the method name is the same as the matched Symbol. + # See the SAX2Listener for more information. + def listen( *args, &blok ) + if args[0].kind_of? Symbol + if args.size == 2 + args[1].each { |match| @procs << [args[0], match, blok] } + else + add( [args[0], nil, blok] ) + end + elsif args[0].kind_of? Array + if args.size == 2 + args[0].each { |match| add( [nil, match, args[1]] ) } + else + args[0].each { |match| add( [ :start_element, match, blok ] ) } + end + else + add([nil, nil, args[0]]) + end + end + + def deafen( listener=nil, &blok ) + if listener + @listeners.delete_if {|item| item[-1] == listener } + @has_listeners = false if @listeners.size == 0 + else + @procs.delete_if {|item| item[-1] == blok } + end + end + + def parse + @procs.each { |sym,match,block| block.call if sym == :start_document } + @listeners.each { |sym,match,block| + block.start_document if sym == :start_document or sym.nil? + } + context = [] + while true + event = @parser.pull + case event[0] + when :end_document + handle( :end_document ) + break + when :start_doctype + handle( :doctype, *event[1..-1]) + when :end_doctype + context = context[1] + when :start_element + @tag_stack.push(event[1]) + # find the observers for namespaces + procs = get_procs( :start_prefix_mapping, event[1] ) + listeners = get_listeners( :start_prefix_mapping, event[1] ) + if procs or listeners + # break out the namespace declarations + # The attributes live in event[2] + event[2].each {|n, v| event[2][n] = @parser.normalize(v)} + nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ } + nsdecl.collect! { |n, value| [ n[6..-1], value ] } + @namespace_stack.push({}) + nsdecl.each do |n,v| + @namespace_stack[-1][n] = v + # notify observers of namespaces + procs.each { |ob| ob.call( n, v ) } if procs + listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners + end + end + event[1] =~ Namespace::NAMESPLIT + prefix = $1 + local = $2 + uri = get_namespace(prefix) + # find the observers for start_element + procs = get_procs( :start_element, event[1] ) + listeners = get_listeners( :start_element, event[1] ) + # notify observers + procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs + listeners.each { |ob| + ob.start_element( uri, local, event[1], event[2] ) + } if listeners + when :end_element + @tag_stack.pop + event[1] =~ Namespace::NAMESPLIT + prefix = $1 + local = $2 + uri = get_namespace(prefix) + # find the observers for start_element + procs = get_procs( :end_element, event[1] ) + listeners = get_listeners( :end_element, event[1] ) + # notify observers + procs.each { |ob| ob.call( uri, local, event[1] ) } if procs + listeners.each { |ob| + ob.end_element( uri, local, event[1] ) + } if listeners + + namespace_mapping = @namespace_stack.pop + # find the observers for namespaces + procs = get_procs( :end_prefix_mapping, event[1] ) + listeners = get_listeners( :end_prefix_mapping, event[1] ) + if procs or listeners + namespace_mapping.each do |ns_prefix, ns_uri| + # notify observers of namespaces + procs.each { |ob| ob.call( ns_prefix ) } if procs + listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners + end + end + when :text + #normalized = @parser.normalize( event[1] ) + #handle( :characters, normalized ) + copy = event[1].clone + + esub = proc { |match| + if @entities.has_key?($1) + @entities[$1].gsub(Text::REFERENCE, &esub) + else + match + end + } + + copy.gsub!( Text::REFERENCE, &esub ) + copy.gsub!( Text::NUMERICENTITY ) {|m| + m=$1 + m = "0#{m}" if m[0] == ?x + [Integer(m)].pack('U*') + } + handle( :characters, copy ) + when :entitydecl + handle_entitydecl( event ) + when :processing_instruction, :comment, :attlistdecl, + :elementdecl, :cdata, :notationdecl, :xmldecl + handle( *event ) + end + handle( :progress, @parser.position ) + end + end + + private + def handle( symbol, *arguments ) + tag = @tag_stack[-1] + procs = get_procs( symbol, tag ) + listeners = get_listeners( symbol, tag ) + # notify observers + procs.each { |ob| ob.call( *arguments ) } if procs + listeners.each { |l| + l.send( symbol.to_s, *arguments ) + } if listeners + end + + def handle_entitydecl( event ) + @entities[ event[1] ] = event[2] if event.size == 3 + parameter_reference_p = false + case event[2] + when "SYSTEM" + if event.size == 5 + if event.last == "%" + parameter_reference_p = true + else + event[4, 0] = "NDATA" + end + end + when "PUBLIC" + if event.size == 6 + if event.last == "%" + parameter_reference_p = true + else + event[5, 0] = "NDATA" + end + end + else + parameter_reference_p = (event.size == 4) + end + event[1, 0] = event.pop if parameter_reference_p + handle( event[0], event[1..-1] ) + end + + # The following methods are duplicates, but it is faster than using + # a helper + def get_procs( symbol, name ) + return nil if @procs.size == 0 + @procs.find_all do |sym, match, block| + ( + (sym.nil? or symbol == sym) and + ((name.nil? and match.nil?) or match.nil? or ( + (name == match) or + (match.kind_of? Regexp and name =~ match) + ) + ) + ) + end.collect{|x| x[-1]} + end + def get_listeners( symbol, name ) + return nil if @listeners.size == 0 + @listeners.find_all do |sym, match, block| + ( + (sym.nil? or symbol == sym) and + ((name.nil? and match.nil?) or match.nil? or ( + (name == match) or + (match.kind_of? Regexp and name =~ match) + ) + ) + ) + end.collect{|x| x[-1]} + end + + def add( pair ) + if pair[-1].respond_to? :call + @procs << pair unless @procs.include? pair + else + @listeners << pair unless @listeners.include? pair + @has_listeners = true + end + end + + def get_namespace( prefix ) + uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) || + (@namespace_stack.find { |ns| not ns[nil].nil? }) + uris[-1][prefix] unless uris.nil? or 0 == uris.size + end + end + end +end |