# frozen_string_literal: false module URI class RFC3986_Parser # :nodoc: # URI defined in RFC3986 # this regexp is modified not to host is not empty string RFC3986_URI = /\A(?(?[A-Za-z][+\-.0-9A-Za-z]*+):(?\/\/(?(?:(?(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*+)@)?(?(?\[(?:(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{1,4}?::(?:\h{1,4}:){4}\g|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v\h++\.[!$&-.0-;=A-Z_a-z~]++))\])|\g|(?(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])++))?(?::(?\d*+))?)(?(?:\/(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*+))*+)|(?\/(?:(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~])++)(?:\/\g)*+)?)|(?\g(?:\/\g)*+)|(?))(?:\?(?[^#]*+))?(?:\#(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*+))?)\z/ RFC3986_relative_ref = /\A(?(?\/\/(?(?:(?(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*+)@)?(?(?\[(?:(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{1,4}?::(?:\h{1,4}:){4}\g|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v\h++\.[!$&-.0-;=A-Z_a-z~]++))\])|\g|(?(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])++))?(?::(?\d*+))?)(?(?:\/(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*+))*+)|(?\/(?:(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~])++)(?:\/\g)*+)?)|(?(?(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])++)(?:\/\g)*+)|(?))(?:\?(?[^#]*+))?(?:\#(?(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*+))?)\z/ attr_reader :regexp def initialize @regexp = default_regexp.each_value(&:freeze).freeze end def split(uri) #:nodoc: begin uri = uri.to_str rescue NoMethodError raise InvalidURIError, "bad URI(is not URI?): #{uri.inspect}" end uri.ascii_only? or raise InvalidURIError, "URI must be ascii only #{uri.dump}" if m = RFC3986_URI.match(uri) query = m["query".freeze] scheme = m["scheme".freeze] opaque = m["path-rootless".freeze] if opaque opaque << "?#{query}" if query [ scheme, nil, # userinfo nil, # host nil, # port nil, # registry nil, # path opaque, nil, # query m["fragment".freeze] ] else # normal [ scheme, m["userinfo".freeze], m["host".freeze], m["port".freeze], nil, # registry (m["path-abempty".freeze] || m["path-absolute".freeze] || m["path-empty".freeze]), nil, # opaque query, m["fragment".freeze] ] end elsif m = RFC3986_relative_ref.match(uri) [ nil, # scheme m["userinfo".freeze], m["host".freeze], m["port".freeze], nil, # registry, (m["path-abempty".freeze] || m["path-absolute".freeze] || m["path-noscheme".freeze] || m["path-empty".freeze]), nil, # opaque m["query".freeze], m["fragment".freeze] ] else raise InvalidURIError, "bad URI(is not URI?): #{uri.inspect}" end end def parse(uri) # :nodoc: scheme, userinfo, host, port, registry, path, opaque, query, fragment = self.split(uri) scheme_list = URI.scheme_list if scheme && scheme_list.include?(uc = scheme.upcase) scheme_list[uc].new(scheme, userinfo, host, port, registry, path, opaque, query, fragment, self) else Generic.new(scheme, userinfo, host, port, registry, path, opaque, query, fragment, self) end end def join(*uris) # :nodoc: uris[0] = convert_to_uri(uris[0]) uris.inject :merge end @@to_s = Kernel.instance_method(:to_s) def inspect @@to_s.bind_call(self) end private def default_regexp # :nodoc: { SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/, USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/, HOST: /\A(?:(?\[(?:(?(?:\h{1,4}:){6}(?\h{1,4}:\h{1,4}|(?(?[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g\.\g\.\g))|::(?:\h{1,4}:){5}\g|\h{,4}::(?:\h{1,4}:){4}\g|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g|(?(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))\z/, ABS_PATH: /\A\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/, REL_PATH: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/, QUERY: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/, FRAGMENT: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/, OPAQUE: /\A(?:[^\/].*)?\z/, PORT: /\A[\x09\x0a\x0c\x0d ]*\d*[\x09\x0a\x0c\x0d ]*\z/, } end def convert_to_uri(uri) if uri.is_a?(URI::Generic) uri elsif uri = String.try_convert(uri) parse(uri) else raise ArgumentError, "bad argument (expected URI object or URI string)" end end end # class Parser end # module URI