From fcbf63e62c627deae76c1b8cb8c0876c536ed811 Mon Sep 17 00:00:00 2001 From: Jari Vetoniemi Date: Mon, 16 Mar 2020 18:49:26 +0900 Subject: Fresh start --- jni/ruby/lib/uri/rfc3986_parser.rb | 124 +++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 jni/ruby/lib/uri/rfc3986_parser.rb (limited to 'jni/ruby/lib/uri/rfc3986_parser.rb') diff --git a/jni/ruby/lib/uri/rfc3986_parser.rb b/jni/ruby/lib/uri/rfc3986_parser.rb new file mode 100644 index 0000000..3923b06 --- /dev/null +++ b/jni/ruby/lib/uri/rfc3986_parser.rb @@ -0,0 +1,124 @@ +module URI + class RFC3986_Parser # :nodoc: + # URI defined in RFC3986 + # this regexp is modified not to host is not empty string + RFC3986_URI = /\A(?(?[A-Za-z][+\-.0-9A-Za-z]*):(?\/\/(?(?:(?(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?(?\[(?:(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{1,4}?::(?:\h{1,4}:){4}\g|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g|(?(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?\d*))?)(?(?:\/(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?\/(?:(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g)*)?)|(?\g(?:\/\g)*)|(?))(?:\?(?[^#]*))?(?:\#(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/ + RFC3986_relative_ref = /\A(?(?\/\/(?(?:(?(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?(?\[(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{1,4}?::(?:\h{1,4}:){4}\g|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g|(?(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?\d*))?)(?(?:\/(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?\/(?:(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g)*)?)|(?(?(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g)*)|(?))(?:\?(?[^#]*))?(?:\#(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/ + attr_reader :regexp + + def initialize + @regexp = default_regexp.each_value(&:freeze).freeze + end + + def split(uri) #:nodoc: + begin + uri = uri.to_str + rescue NoMethodError + raise InvalidURIError, "bad URI(is not URI?): #{uri}" + end + uri.ascii_only? or + raise InvalidURIError, "URI must be ascii only #{uri.dump}" + if m = RFC3986_URI.match(uri) + query = m["query".freeze] + scheme = m["scheme".freeze] + opaque = m["path-rootless".freeze] + if opaque + opaque << "?#{query}" if query + [ scheme, + nil, # userinfo + nil, # host + nil, # port + nil, # registry + nil, # path + opaque, + nil, # query + m["fragment".freeze] + ] + else # normal + [ scheme, + m["userinfo".freeze], + m["host".freeze], + m["port".freeze], + nil, # registry + (m["path-abempty".freeze] || + m["path-absolute".freeze] || + m["path-empty".freeze]), + nil, # opaque + query, + m["fragment".freeze] + ] + end + elsif m = RFC3986_relative_ref.match(uri) + [ nil, # scheme + m["userinfo".freeze], + m["host".freeze], + m["port".freeze], + nil, # registry, + (m["path-abempty".freeze] || + m["path-absolute".freeze] || + m["path-noscheme".freeze] || + m["path-empty".freeze]), + nil, # opaque + m["query".freeze], + m["fragment".freeze] + ] + else + raise InvalidURIError, "bad URI(is not URI?): #{uri}" + end + end + + def parse(uri) # :nodoc: + scheme, userinfo, host, port, + registry, path, opaque, query, fragment = self.split(uri) + scheme_list = URI.scheme_list + if scheme && scheme_list.include?(uc = scheme.upcase) + scheme_list[uc].new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) + else + Generic.new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) + end + end + + + def join(*uris) # :nodoc: + uris[0] = convert_to_uri(uris[0]) + uris.inject :merge + end + + @@to_s = Kernel.instance_method(:to_s) + def inspect + @@to_s.bind(self).call + end + + private + + def default_regexp # :nodoc: + { + SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/, + USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/, + HOST: /\A(?:(?\[(?:(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{,4}::(?:\h{1,4}:){4}\g|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g|(?(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))\z/, + ABS_PATH: /\A\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/, + REL_PATH: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/, + QUERY: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/, + FRAGMENT: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/, + OPAQUE: /\A(?:[^\/].*)?\z/, + PORT: /\A[\x09\x0a\x0c\x0d ]*\d*[\x09\x0a\x0c\x0d ]*\z/, + } + end + + def convert_to_uri(uri) + if uri.is_a?(URI::Generic) + uri + elsif uri = String.try_convert(uri) + parse(uri) + else + raise ArgumentError, + "bad argument (expected URI object or URI string)" + end + end + + end # class Parser +end # module URI -- cgit v1.2.3