U 巀g?@sdZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl m Z ddlmZddlmZddlmZmZmZmZmZmZmZmZmZmZmZddlmZddl m!Z!dd l"m#Z#m$Z$dd l%m&Z&dd l'm(Z(dd l)m*Z*dd l+m,Z,ddl-m.Z.ddl/m0Z0ddl1m2Z2ddl3m4Z4ddl5m6Z6m7Z7m8Z8e9e:Z;eeGddde?Z@e!ddddZAGddde?ZBed?ZOGd@dAdAeZPGdBdCdCZQdS)FzO The main purpose of this module is to expose LinkCollector.collect_sources(). N) dataclass) HTMLParser)Values) CallableDictIterableListMutableMapping NamedTupleOptionalProtocolSequenceTupleUnion)requests)Response) RetryErrorSSLError)NetworkConnectionError)Link) SearchScope) PipSession)raise_for_status)is_archive_fileredact_auth_from_url)vcs)CandidatesFromPage LinkSource build_sourceurlreturncCs6tjD]*}||r|t|dkr|SqdS)zgLook for VCS schemes in the URL. Returns the matched VCS scheme, or None if there's no match. z+:N)rschemeslower startswithlen)r"schemer)K/opt/hc_python/lib/python3.8/site-packages/pip/_internal/index/collector.py_match_vcs_scheme3s  r+cs&eZdZeeddfdd ZZS)_NotAPIContentN) content_type request_descr#cst||||_||_dSN)super__init__r-r.)selfr-r. __class__r)r*r1?sz_NotAPIContent.__init__)__name__ __module__ __qualname__strr1 __classcell__r)r)r3r*r,>sr,)responser#cCs6|jdd}|}|dr$dSt||jjdS)z Check the Content-Type header to ensure the response contains a Simple API Response. Raises `_NotAPIContent` if the content type is not a valid content-type. Content-TypeUnknown)z text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr%r&r,requestmethod)r:r-content_type_lr)r)r*_ensure_api_headerEsrCc@s eZdZdS)_NotHTTPN)r5r6r7r)r)r)r*rD[srD)r"sessionr#cCsFtj|\}}}}}|dkr$t|j|dd}t|t|dS)z Send a HEAD request to the URL, and ensure the response contains a simple API Response. Raises `_NotHTTP` if the URL is not available for a HEAD request, or `_NotAPIContent` if the content type is not a valid content type. >httpshttpT)allow_redirectsN)urllibparseurlsplitrDheadrrC)r"rEr(netlocpathqueryfragmentrespr)r)r*_ensure_api_response_s rRcCsztt|jrt||dtdt||j|ddddgddd }t |t |td t||j d d |S) aYAccess an Simple API response with GET, and return the response. This consists of three parts: 1. If the URL looks suspiciously like an archive, send a HEAD first to check the Content-Type is HTML or Simple API, to avoid downloading a large file. Raise `_NotHTTP` if the content type cannot be determined, or `_NotAPIContent` if it is not HTML or a Simple API. 2. Actually perform the request. Raise HTTP exceptions on network failures. 3. Check the Content-Type header to make sure we got a Simple API response, and raise `_NotAPIContent` otherwise. rEzGetting page %sz, r=z*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z max-age=0)Acceptz Cache-Control)r>zFetched page %s as %sr;r<) rrfilenamerRloggerdebugrr?joinrrCr>)r"rErQr)r)r*_get_simple_responseqs,   rY)r>r#cCs<|r8d|kr8tj}|d|d<|d}|r8t|SdS)z=Determine if we have any encoding information in our headers.r;z content-typecharsetN)emailmessageMessage get_paramr8)r>mrZr)r)r*_get_encoding_from_headerss    r`c@s:eZdZdddddZeedddZed d d ZdS) CacheablePageContent IndexContentNpager#cCs|js t||_dSr/)cache_link_parsingAssertionErrorrdr2rdr)r)r*r1s zCacheablePageContent.__init__)otherr#cCst|t|o|jj|jjkSr/) isinstancetyperdr")r2rhr)r)r*__eq__szCacheablePageContent.__eq__r#cCs t|jjSr/)hashrdr"r2r)r)r*__hash__szCacheablePageContent.__hash__) r5r6r7r1objectboolrkintror)r)r)r*rasrac@s eZdZdeedddZdS) ParseLinksrbrccCsdSr/r)rgr)r)r*__call__zParseLinks.__call__N)r5r6r7rrrtr)r)r)r*rssrs)fnr#csLtjddtttdfdd tdttdfdd }|S) z Given a function that parses an Iterable[Link] from an IndexContent, cache the function's result (keyed by CacheablePageContent), unless the IndexContent `page` has `page.cache_link_parsing == False`. N)maxsize)cacheable_pager#cst|jSr/)listrd)rx)rvr)r*wrappersz*with_cached_index_content..wrapperrbrccs|jrt|St|Sr/)rerary)rdrvrzr)r*wrapper_wrappers z2with_cached_index_content..wrapper_wrapper) functools lru_cacherarrwraps)rvr|r)r{r*with_cached_index_contents  rrbrcc cs|j}|drTt|j}|dgD]"}t||j }|dkrHq,|Vq,dSt |j }|j pfd}| |j ||j }|jp|}|jD]$} tj| ||d}|dkrq|VqdS)z\ Parse a Simple API's Index Content, and yield its anchor elements as Link objects. r=filesNzutf-8)page_urlbase_url)r-r%r&jsonloadscontentr?r from_jsonr"HTMLLinkParserencodingfeeddecoderanchors from_element) rdrBdatafilelinkparserrr"ranchorr)r)r* parse_linkss&       rT)frozenc@sPeZdZUdZeed<eed<eeed<eed<dZe ed<edd d Z d S) rbaRepresents one response (or page), along with its URL. :param encoding: the encoding to decode the given content. :param url: the URL from which the HTML was downloaded. :param cache_link_parsing: whether links parsed from this page's url should be cached. PyPI index urls should have this set to False, for example. rr-rr"TrerlcCs t|jSr/)rr"rnr)r)r*__str__ szIndexContent.__str__N) r5r6r7__doc__bytes__annotations__r8r rerqrr)r)r)r*rbs    csneZdZdZeddfdd ZeeeeeefddddZ eeeeefeed d d Z Z S) rzf HTMLParser that keeps the first base HREF and a list of all anchor elements' attributes. Nr!cs$tjdd||_d|_g|_dS)NT)convert_charrefs)r0r1r"rr)r2r"r3r)r*r1szHTMLLinkParser.__init__)tagattrsr#cCsH|dkr,|jdkr,||}|dk rD||_n|dkrD|jt|dS)Nbasea)rget_hrefrappenddict)r2rrhrefr)r)r*handle_starttags  zHTMLLinkParser.handle_starttag)rr#cCs"|D]\}}|dkr|SqdS)Nrr))r2rnamevaluer)r)r*r&s  zHTMLLinkParser.get_href) r5r6r7rr8r1rrr rrr9r)r)r3r*rs"r).N)rreasonmethr#cCs|dkrtj}|d||dS)Nz%Could not fetch URL %s: %s - skipping)rVrW)rrrr)r)r*_handle_get_simple_fail-sr)r:rer#cCs&t|j}t|j|jd||j|dS)Nr;)rr"re)r`r>rbrr")r:rerr)r)r*_make_index_content7s r)rrEr#c Cs|jddd}t|}|r0td||dStj|\}}}}}}|dkrtj tj |r| dsv|d7}tj|d}td|zt||d }WnDtk rtd |Yn2tk r}ztd ||j|jW5d}~XYntk r$}zt||W5d}~XYntk rP}zt||W5d}~XYntk r}z$d } | t|7} t|| tjd W5d}~XYndtjk r}zt|d|W5d}~XYn0tjk rt|dYnXt||jdSdS)N#rrzICannot look at %s URL %s because it does not support lookup as web pages.r/z index.htmlz# file: URL is directory, getting %srSz`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )rzconnection error: z timed out)re) r"splitr+rVwarningrIrJurlparseosrNisdirr@ url2pathnameendswithurljoinrWrYrDr,r.r-rrrrr8inforConnectionErrorTimeoutrre) rrEr" vcs_schemer(_rNrQexcrr)r)r*_get_index_contentDsV      rc@s.eZdZUeeeed<eeeed<dS)CollectedSources find_links index_urlsN)r5r6r7r r rrr)r)r)r*rs rc@sxeZdZdZeeddddZedeee dddd Z e e e d d d Zeeed ddZe eedddZdS) LinkCollectorz Responsible for collecting Link objects from all configured locations, making network requests as needed. The class's main method is its collect_sources() method. N)rE search_scoper#cCs||_||_dSr/)rrE)r2rErr)r)r*r1szLinkCollector.__init__F)rEoptionssuppress_no_indexr#cCsd|jg|j}|jr8|s8tdddd|Dg}|jp@g}tj|||jd}t ||d}|S)z :param session: The Session to use to make requests. :param suppress_no_index: Whether to ignore the --no-index option when constructing the SearchScope object. zIgnoring indexes: %s,css|]}t|VqdSr/r).0r"r)r)r* sz'LinkCollector.create..)rrno_index)rEr) index_urlextra_index_urlsrrVrWrXrrcreater)clsrErrrrrlink_collectorr)r)r*rs$   zLinkCollector.createrlcCs|jjSr/)rrrnr)r)r*rszLinkCollector.find_links)locationr#cCst||jdS)z> Fetch an HTML page containing package links. rS)rrE)r2rr)r)r*fetch_responseszLinkCollector.fetch_response) project_namecandidates_from_pager#cstfddjD}tfddjD}ttj rddt ||D}t |ddg|}t d|tt|t|d S) Nc 3s&|]}t|jjdddVqdS)Frpage_validator expand_dirrerNr rEis_secure_originrlocrrr2r)r*rs z0LinkCollector.collect_sources..c 3s&|]}t|jjdddVqdS)TrNrrrr)r*rs cSs*g|]"}|dk r|jdk rd|jqS)Nz* )r)rsr)r)r* s z1LinkCollector.collect_sources..z' location(s) to search for versions of : )rr) collections OrderedDictrget_index_urls_locationsvaluesrrV isEnabledForloggingDEBUG itertoolschainr'rWrXrry)r2rrindex_url_sourcesfind_links_sourceslinesr)rr*collect_sourcess&    zLinkCollector.collect_sources)F)r5r6r7rrrr1 classmethodrrqrpropertyrr8rrr rbrrrrr)r)r)r*rs(  !r)N)T)Rrr email.messager[r}rrrr urllib.parserIurllib.request dataclassesr html.parserroptparsertypingrrrrr r r r r rr pip._vendorrZpip._vendor.requestsrZpip._vendor.requests.exceptionsrrpip._internal.exceptionsrpip._internal.models.linkr!pip._internal.models.search_scoperpip._internal.network.sessionrpip._internal.network.utilsrpip._internal.utils.filetypesrpip._internal.utils.miscrpip._internal.vcsrsourcesrrr getLoggerr5rVr8ResponseHeadersr+ Exceptionr,rCrDrRrYr`rarsrrrbrrrqrrrrr)r)r)r*st   4             ?      =