7f? dZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl m Z ddlmZddlmZddlmZmZmZmZmZmZmZmZmZmZmZddlmZddl m!Z!dd l"m#Z#m$Z$dd l%m&Z&dd l'm(Z(dd l)m*Z*dd l+m,Z,ddl-m.Z.ddl/m0Z0ddl1m2Z2ddl3m4Z4ddl5m6Z6m7Z7m8Z8ej9e:Z;eeGdde?Z@de!ddfdZAGdde?ZBdezO The main purpose of this module is to expose LinkCollector.collect_sources(). N) dataclass) HTMLParser)Values) CallableDictIterableListMutableMapping NamedTupleOptionalProtocolSequenceTupleUnion)requests)Response) RetryErrorSSLError)NetworkConnectionError)Link) SearchScope) PipSession)raise_for_status)is_archive_fileredact_auth_from_url)vcs)CandidatesFromPage LinkSource build_sourceurlreturnctjD]D}||r|t |dvr|cSEdS)zgLook for VCS schemes in the URL. Returns the matched VCS scheme, or None if there's no match. z+:N)rschemeslower startswithlen)r"schemes }/builddir/build/BUILD/imunify360-venv-2.3.5/opt/imunify360/venv/lib/python3.11/site-packages/pip/_internal/index/collector.py_match_vcs_schemer+3sW + 99;; ! !& ) ) c#f++.>$.F.FMMM 4c,eZdZdededdffd ZxZS)_NotAPIContent content_type request_descr#Ncht||||_||_dSN)super__init__r/r0)selfr/r0 __class__s r*r4z_NotAPIContent.__init__?s3 |444((r,)__name__ __module__ __qualname__strr4 __classcell__r6s@r*r.r.>sR)S))))))))))))r,r.responsec|jdd}|}|drdSt ||jj)z Check the Content-Type header to ensure the response contains a Simple API Response. Raises `_NotAPIContent` if the content type is not a valid content-type. Content-TypeUnknown)z text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr&r'r.requestmethod)r=r/content_type_ls r*_ensure_api_headerrGEsg#'' BBL!''))N     x'7'> ? ??r,ceZdZdS)_NotHTTPN)r7r8r9r,r*rIrI[sDr,rIsessionctj|\}}}}}|dvrt||d}t |t |dS)z Send a HEAD request to the URL, and ensure the response contains a simple API Response. Raises `_NotHTTP` if the URL is not available for a HEAD request, or `_NotAPIContent` if the content type is not a valid content type. >httphttpsT)allow_redirectsN)urllibparseurlsplitrIheadrrG)r"rKr)netlocpathqueryfragmentresps r*_ensure_api_responserY_sq-3L,A,A#,F,F)FFD% &&&jj <<T< 2 2DTtr,ctt|jrt||tdt |||dgddd}t|t|tdt ||j d d |S) aYAccess an Simple API response with GET, and return the response. This consists of three parts: 1. If the URL looks suspiciously like an archive, send a HEAD first to check the Content-Type is HTML or Simple API, to avoid downloading a large file. Raise `_NotHTTP` if the content type cannot be determined, or `_NotAPIContent` if it is not HTML or a Simple API. 2. Actually perform the request. Raise HTTP exceptions on network failures. 3. Check the Content-Type header to make sure we got a Simple API response, and raise `_NotAPIContent` otherwise. rKzGetting page %sz, )rAz*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z max-age=0)Acceptz Cache-Control)rBzFetched page %s as %sr?r@) rrfilenamerYloggerdebugrrCjoinrrGrB)r"rKrXs r*_get_simple_responseraqstCyy)**3S'2222 LL"$8$=$=>>> ;; ii()+     D4Tt LLS!! 33 Kr,rBc|rSd|vrOtj}|d|d<|d}|rt |SdS)z=Determine if we have any encoding information in our headers.r?z content-typecharsetN)emailmessageMessage get_paramr:)rBmrcs r*_get_encoding_from_headersris` >W,, M ! ! # ##N3.++i((  w<<  4r,c2eZdZd dZdedefdZdefdZdS) CacheablePageContentpage IndexContentr#Nc&|jsJ||_dSr2)cache_link_parsingrlr5rls r*r4zCacheablePageContent.__init__s&&&& r,othercpt|t|o|jj|jjkSr2) isinstancetyperlr")r5rqs r*__eq__zCacheablePageContent.__eq__s*%d,,P%*.1PPr,c4t|jjSr2)hashrlr"r5s r*__hash__zCacheablePageContent.__hash__sDIM"""r,)rlrmr#N) r7r8r9r4objectboolruintryrJr,r*rkrkskQFQtQQQQ########r,rkc*eZdZdddeefdZdS) ParseLinksrlrmr#cdSr2rJrps r*__call__zParseLinks.__call__sr,N)r7r8r9rrrrJr,r*r~r~s*C^CCCCCCCr,r~fnctjddtdttffd tjdddttffd }|S) z Given a function that parses an Iterable[Link] from an IndexContent, cache the function's result (keyed by CacheablePageContent), unless the IndexContent `page` has `page.cache_link_parsing == False`. N)maxsizecacheable_pager#c>t|jSr2)listrl)rrs r*wrapperz*with_cached_index_content..wrappersBB~*++,,,r,rlrmcr|jrt|St|Sr2)rorkr)rlrrs r*wrapper_wrapperz2with_cached_index_content..wrapper_wrappers;  " 77/5566 6BBtHH~~r,) functools lru_cacherkr rwraps)rrrs` @r*with_cached_index_contentrs&&&- 4-d-----'&-_Rnd r,rlrmc#K|j}|drUtj|j}|dgD]#}tj||j }||V$dSt|j }|j pd}| |j ||j }|jp|}|jD] } tj| ||}||V!dS)z\ Parse a Simple API's Index Content, and yield its anchor elements as Link objects. rAfilesNzutf-8)page_urlbase_url)r/r&r'jsonloadscontentrCr from_jsonr"HTMLLinkParserencodingfeeddecoderanchors from_element) rlrFdatafilelinkparserrr"ranchors r* parse_linksrs! &,,..N  !FGGz$,''HHWb))  D>$11D|JJJJ DH % %F}'H KK ##H--... (C%#H. #III <  r,T)frozencbeZdZUdZeed<eed<eeed<eed<dZe ed<defd Z d S) rmaRepresents one response (or page), along with its URL. :param encoding: the encoding to decode the given content. :param url: the URL from which the HTML was downloaded. :param cache_link_parsing: whether links parsed from this page's url should be cached. PyPI index urls should have this set to False, for example. rr/rr"Tror#c*t|jSr2)rr"rxs r*__str__zIndexContent.__str__ s#DH---r,N) r7r8r9__doc__bytes__annotations__r:r ror{rrJr,r*rmrms|NNNsm HHH####.......r,ceZdZdZdeddffd ZdedeeeeefddfdZ deeeeefdeefd Z xZ S) rzf HTMLParser that keeps the first base HREF and a list of all anchor elements' attributes. r"r#Ncvtd||_d|_g|_dS)NT)convert_charrefs)r3r4r"rr)r5r"r6s r*r4zHTMLLinkParser.__init__s7 $///'+ 79 r,tagattrsc|dkr)|j"||}| ||_dSdS|dkr)|jt |dSdS)Nbasea)rget_hrefrappenddict)r5rrhrefs r*handle_starttagzHTMLLinkParser.handle_starttagsn &==T]2==''D $   CZZ L  U , , , , ,Zr,c*|D]\}}|dkr|cSdS)NrrJ)r5rnamevalues r*rzHTMLLinkParser.get_href&s1   KD%v~~ tr,) r7r8r9rr:r4r rr rrr;r<s@r*rrs :C:D::::::-3-tE#x}:L4M/N-SW----d5hsm);#<=(3-r,rrreasonmeth).Nc<| tj}|d||dS)Nz%Could not fetch URL %s: %s - skipping)r^r_)rrrs r*_handle_get_simple_failr-s+  ||D 0$?????r,roc|t|j}t|j|jd||j|S)Nr?)rr"ro)rirBrmrr")r=rors r*_make_index_contentr7sG*(*:;;H ( L-    r,c|jddd}t|}|rtd||dSt j|\}}}}}}|dkrtj t j |rU| ds|dz }t j|d}td| t!|| }t#||j S#t&$rtd |Yn t($r1}td ||j|jYd}~nd}~wt.$r}t1||Yd}~nd}~wt2$r}t1||Yd}~nd}~wt4$r:}d } | t7|z } t1|| tjYd}~nRd}~wt:j$r}t1|d|Yd}~n(d}~wt:j$rt1|dYnwxYwdS)N#rrzICannot look at %s URL %s because it does not support lookup as web pages.r/z index.htmlz# file: URL is directory, getting %sr[)roz`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )rzconnection error: z timed out) r"splitr+r^warningrPrQurlparseosrUisdirrD url2pathnameendswithurljoinr_rarrorIr.r0r/rrrrr:inforConnectionErrorTimeout) rrKr" vcs_schemer)_rUrXexcrs r*_get_index_contentrDs (..a  #C#3''J W     t &|44S99FAtQ1 BGMM&.*E*Ed*K*KLL||C    3JC l""3 55 :C@@@U#C999:#4D*-....*-......r,rc eZdZdZdededdfdZe ddeded e ddfd Z e de e fd Zd edeefd Zde dedefdZdS) LinkCollectorz Responsible for collecting Link objects from all configured locations, making network requests as needed. The class's main method is its collect_sources() method. rK search_scoper#Nc"||_||_dSr2)rrK)r5rKrs r*r4zLinkCollector.__init__s ) r,Foptionssuppress_no_indexc|jg|jz}|jr<|s:tddd|Dg}|jpg}tj|||j}t||}|S)z :param session: The Session to use to make requests. :param suppress_no_index: Whether to ignore the --no-index option when constructing the SearchScope object. zIgnoring indexes: %s,c34K|]}t|VdSr2r).0r"s r* z'LinkCollector.create..s+IIs-c22IIIIIIr,)rrno_index)rKr) index_urlextra_index_urlsrr^r_r`rrcreater)clsrKrrrrrlink_collectors r*rzLinkCollector.creates'(7+CC   $5  LL&IIjIIIII   J'-2 ")!!%   '%   r,c|jjSr2)rrrxs r*rzLinkCollector.find_linkss ++r,locationc.t||jS)z> Fetch an HTML page containing package links. r[)rrK)r5rs r*fetch_responsezLinkCollector.fetch_responses"(DLAAAAr, project_namecandidates_from_pagecztjfdjD}tjfdjD}t tj redtj ||D}t|ddg|z}t d|tt!|t!|S)Nc 3VK|]#}t|jjddV$dS)Frpage_validator expand_dirrorNr!rKis_secure_originrlocrrr5s r*rz0LinkCollector.collect_sources..s_ 4 4  %9#|< #()     4 4 4 4 4 4 r,c 3VK|]#}t|jjddV$dS)TrNrrs r*rz0LinkCollector.collect_sources..s_ 5 5  %9#|<#')     5 5 5 5 5 5 r,c4g|]}||j d|jS)Nz* )r)rss r* z1LinkCollector.collect_sources..s6=QV%7QV %7%7%7r,z' location(s) to search for versions of : )rr) collections OrderedDictrget_index_urls_locationsvaluesrr^ isEnabledForloggingDEBUG itertoolschainr(r_r`rr)r5rrindex_url_sourcesfind_links_sourcesliness``` r*collect_sourceszLinkCollector.collect_sourcess (3 4 4 4 4 4 4 (AA,OO 4 4 4    &(( )4 5 5 5 5 5 5  5 5 5    &((    w} - - +");=NOOE u::33#/333E LL5)) * * *.//-..    r,)F)r7r8r9rrrr4 classmethodrr{rpropertyr r:rrr rmrrrrrJr,r*rrs'"   #(    [B,DI,,,X,BtB0FBBBB , , 1,   , , , , , , r,rr2)T)Rrr email.messagerdrrrrr urllib.parserPurllib.request dataclassesr html.parserroptparsertypingrrrr r r r r rrr pip._vendorrpip._vendor.requestsrpip._vendor.requests.exceptionsrrpip._internal.exceptionsrpip._internal.models.linkr!pip._internal.models.search_scoperpip._internal.network.sessionrpip._internal.network.utilsrpip._internal.utils.filetypesrpip._internal.utils.miscrpip._internal.vcsrsourcesrr r! getLoggerr7r^r:ResponseHeadersr+ Exceptionr.rGrIrYrarirkr~rrrmrrr{rrrrrJr,r*r s  !!!!!!""""""                          ! ))))))@@@@@@@@;;;;;;******999999444444888888999999999999!!!!!!AAAAAAAAAA  8 $ $ c*38C=)))))Y)))@@d@@@@,     y   cJ4$<c<J<8<<<<~HSM # # # # # # # #DDDDDDDD*(n$8 $........(Z>+/@@ @ #y. !@ 8I& '@ @@@@48   ,0     :T:z:h~>V::::z/////z/// h h h h h h h h h h r,