diff options
| author | Jari Vetoniemi <jari.vetoniemi@indooratlas.com> | 2020-03-16 18:49:26 +0900 | 
|---|---|---|
| committer | Jari Vetoniemi <jari.vetoniemi@indooratlas.com> | 2020-03-30 00:39:06 +0900 | 
| commit | fcbf63e62c627deae76c1b8cb8c0876c536ed811 (patch) | |
| tree | 64cb17de3f41a2b6fef2368028fbd00349946994 /jni/ruby/lib/rexml/source.rb | |
Fresh start
Diffstat (limited to 'jni/ruby/lib/rexml/source.rb')
| -rw-r--r-- | jni/ruby/lib/rexml/source.rb | 296 | 
1 files changed, 296 insertions, 0 deletions
| diff --git a/jni/ruby/lib/rexml/source.rb b/jni/ruby/lib/rexml/source.rb new file mode 100644 index 0000000..cfafdbd --- /dev/null +++ b/jni/ruby/lib/rexml/source.rb @@ -0,0 +1,296 @@ +# coding: US-ASCII +require 'rexml/encoding' + +module REXML +  # Generates Source-s.  USE THIS CLASS. +  class SourceFactory +    # Generates a Source object +    # @param arg Either a String, or an IO +    # @return a Source, or nil if a bad argument was given +    def SourceFactory::create_from(arg) +      if arg.respond_to? :read and +          arg.respond_to? :readline and +          arg.respond_to? :nil? and +          arg.respond_to? :eof? +        IOSource.new(arg) +      elsif arg.respond_to? :to_str +        require 'stringio' +        IOSource.new(StringIO.new(arg)) +      elsif arg.kind_of? Source +        arg +      else +        raise "#{arg.class} is not a valid input stream.  It must walk \n"+ +          "like either a String, an IO, or a Source." +      end +    end +  end + +  # A Source can be searched for patterns, and wraps buffers and other +  # objects and provides consumption of text +  class Source +    include Encoding +    # The current buffer (what we're going to read next) +    attr_reader :buffer +    # The line number of the last consumed text +    attr_reader :line +    attr_reader :encoding + +    # Constructor +    # @param arg must be a String, and should be a valid XML document +    # @param encoding if non-null, sets the encoding of the source to this +    # value, overriding all encoding detection +    def initialize(arg, encoding=nil) +      @orig = @buffer = arg +      if encoding +        self.encoding = encoding +      else +        detect_encoding +      end +      @line = 0 +    end + + +    # Inherited from Encoding +    # Overridden to support optimized en/decoding +    def encoding=(enc) +      return unless super +      encoding_updated +    end + +    # Scans the source for a given pattern.  Note, that this is not your +    # usual scan() method.  For one thing, the pattern argument has some +    # requirements; for another, the source can be consumed.  You can easily +    # confuse this method.  Originally, the patterns were easier +    # to construct and this method more robust, because this method +    # generated search regexps on the fly; however, this was +    # computationally expensive and slowed down the entire REXML package +    # considerably, since this is by far the most commonly called method. +    # @param pattern must be a Regexp, and must be in the form of +    # /^\s*(#{your pattern, with no groups})(.*)/.  The first group +    # will be returned; the second group is used if the consume flag is +    # set. +    # @param consume if true, the pattern returned will be consumed, leaving +    # everything after it in the Source. +    # @return the pattern, if found, or nil if the Source is empty or the +    # pattern is not found. +    def scan(pattern, cons=false) +      return nil if @buffer.nil? +      rv = @buffer.scan(pattern) +      @buffer = $' if cons and rv.size>0 +      rv +    end + +    def read +    end + +    def consume( pattern ) +      @buffer = $' if pattern.match( @buffer ) +    end + +    def match_to( char, pattern ) +      return pattern.match(@buffer) +    end + +    def match_to_consume( char, pattern ) +      md = pattern.match(@buffer) +      @buffer = $' +      return md +    end + +    def match(pattern, cons=false) +      md = pattern.match(@buffer) +      @buffer = $' if cons and md +      return md +    end + +    # @return true if the Source is exhausted +    def empty? +      @buffer == "" +    end + +    def position +      @orig.index( @buffer ) +    end + +    # @return the current line in the source +    def current_line +      lines = @orig.split +      res = lines.grep @buffer[0..30] +      res = res[-1] if res.kind_of? Array +      lines.index( res ) if res +    end + +    private +    def detect_encoding +      buffer_encoding = @buffer.encoding +      detected_encoding = "UTF-8" +      begin +        @buffer.force_encoding("ASCII-8BIT") +        if @buffer[0, 2] == "\xfe\xff" +          @buffer[0, 2] = "" +          detected_encoding = "UTF-16BE" +        elsif @buffer[0, 2] == "\xff\xfe" +          @buffer[0, 2] = "" +          detected_encoding = "UTF-16LE" +        elsif @buffer[0, 3] == "\xef\xbb\xbf" +          @buffer[0, 3] = "" +          detected_encoding = "UTF-8" +        end +      ensure +        @buffer.force_encoding(buffer_encoding) +      end +      self.encoding = detected_encoding +    end + +    def encoding_updated +      if @encoding != 'UTF-8' +        @buffer = decode(@buffer) +        @to_utf = true +      else +        @to_utf = false +        @buffer.force_encoding ::Encoding::UTF_8 +      end +    end +  end + +  # A Source that wraps an IO.  See the Source class for method +  # documentation +  class IOSource < Source +    #attr_reader :block_size + +    # block_size has been deprecated +    def initialize(arg, block_size=500, encoding=nil) +      @er_source = @source = arg +      @to_utf = false +      @pending_buffer = nil + +      if encoding +        super("", encoding) +      else +        super(@source.read(3) || "") +      end + +      if !@to_utf and +          @buffer.respond_to?(:force_encoding) and +          @source.respond_to?(:external_encoding) and +          @source.external_encoding != ::Encoding::UTF_8 +        @force_utf8 = true +      else +        @force_utf8 = false +      end +    end + +    def scan(pattern, cons=false) +      rv = super +      # You'll notice that this next section is very similar to the same +      # section in match(), but just a liiittle different.  This is +      # because it is a touch faster to do it this way with scan() +      # than the way match() does it; enough faster to warrent duplicating +      # some code +      if rv.size == 0 +        until @buffer =~ pattern or @source.nil? +          begin +            @buffer << readline +          rescue Iconv::IllegalSequence +            raise +          rescue +            @source = nil +          end +        end +        rv = super +      end +      rv.taint +      rv +    end + +    def read +      begin +        @buffer << readline +      rescue Exception, NameError +        @source = nil +      end +    end + +    def consume( pattern ) +      match( pattern, true ) +    end + +    def match( pattern, cons=false ) +      rv = pattern.match(@buffer) +      @buffer = $' if cons and rv +      while !rv and @source +        begin +          @buffer << readline +          rv = pattern.match(@buffer) +          @buffer = $' if cons and rv +        rescue +          @source = nil +        end +      end +      rv.taint +      rv +    end + +    def empty? +      super and ( @source.nil? || @source.eof? ) +    end + +    def position +      @er_source.pos rescue 0 +    end + +    # @return the current line in the source +    def current_line +      begin +        pos = @er_source.pos        # The byte position in the source +        lineno = @er_source.lineno  # The XML < position in the source +        @er_source.rewind +        line = 0                    # The \r\n position in the source +        begin +          while @er_source.pos < pos +            @er_source.readline +            line += 1 +          end +        rescue +        end +      rescue IOError +        pos = -1 +        line = -1 +      end +      [pos, lineno, line] +    end + +    private +    def readline +      str = @source.readline(@line_break) +      if @pending_buffer +        if str.nil? +          str = @pending_buffer +        else +          str = @pending_buffer + str +        end +        @pending_buffer = nil +      end +      return nil if str.nil? + +      if @to_utf +        decode(str) +      else +        str.force_encoding(::Encoding::UTF_8) if @force_utf8 +        str +      end +    end + +    def encoding_updated +      case @encoding +      when "UTF-16BE", "UTF-16LE" +        @source.binmode +        @source.set_encoding(@encoding, @encoding) +      end +      @line_break = encode(">") +      @pending_buffer, @buffer = @buffer, "" +      @pending_buffer.force_encoding(@encoding) +      super +    end +  end +end | 
