jf3ddlZddlmZddlmZddlmZddlmZddl m Z ddl m Z m Z mZmZmZmZmZdd lmZmZdd lmZdd lmZmZmZGd d ZGddZeeefZ ee Z!GddZ"dS)N)Counter)aliases)sha256)dumps)sub)AnyDictIteratorListOptionalTupleUnion)NOT_PRINTABLE_PATTERNTOO_BIG_SEQUENCE) mess_ratio) iana_nameis_multi_byte_encoding unicode_rangec eZdZ d*dededededddeef d Zd e d efd Z d e d efd Z e d efdZ e d efdZe d efdZe d efdZd efdZd efdZd+dZe d efdZe d eefdZe d efdZe d efdZe d eefdZe d efdZe d efdZe d efdZe d efdZe d efdZe d efdZ e d edfd Z!e d efd!Z"e d eefd"Z#e d eefd#Z$d,d$Z%d,d%Z&d-d'ed efd(Z'e d efd)Z(dS). CharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom languagesCoherenceMatchesdecoded_payloadc||_||_||_||_||_d|_g|_d|_d|_d|_ ||_ dS)N) _payload _encoding_mean_mess_ratio _languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string)selfrrrrrrs y/builddir/build/BUILD/imunify360-venv-2.3.5/opt/imunify360/venv/lib/python3.11/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__s[  ) /#-# %("# $& otherreturnct|tsGtdt |jt |j|j|jko|j|jkS)Nz&__eq__ cannot be invoked on {} and {}.) isinstancer TypeErrorformatstr __class__encoding fingerprintr,r0s r-__eq__zCharsetMatch.__eq__(ss%.. 8??((#dn*=*=  }.X43CuGX3XXr/cNt|tstt|j|jz }t|j|jz }|dkr<|dkr6|dkr |j|jkr|j|jkS|j|jkS|j|jkS)zQ Implemented to make sorted available upon CharsetMatches items. g{Gz?g{Gz?r )r3r ValueErrorabschaos coherencemulti_byte_usage)r,r0chaos_differencecoherence_differences r-__lt__zCharsetMatch.__lt__1s%..  tzEK788"4>EO#CDD d " "';d'B'B3&&4>U_+L+L,u/EEE>EO3 3zEK''r/cjdtt|t|jz z S)N?)lenr6rawr,s r-rAzCharsetMatch.multi_byte_usageDs&ST^^c$(mm333r/cptjdttt |dS)z Check once again chaos in decoded text, except this time, with full content. Use with caution, this can be very slow. Notice: Will be removed in 3.0 z=chaos_secondary_pass is deprecated and will be removed in 3.0rF)warningswarnDeprecationWarningrr6rIs r-chaos_secondary_passz!CharsetMatch.chaos_secondary_passHs6  K    #d))S)))r/c:tjdtdS)zy Coherence ratio on the first non-latin language detected if ANY. Notice: Will be removed in 3.0 z)r5r8r9rIs r-__repr__zCharsetMatch.__repr__us.55dmTEUVVVr/ct|tr||kr'td|jd|_|j|dS)Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r3rr=r5r7r+r'appendr:s r- add_submatchzCharsetMatch.add_submatchxsk%.. %4--MTTO    E"""""r/c|jSN)r"rIs r-r8zCharsetMatch.encodings ~r/cg}tjD]F\}}|j|kr||&|j|kr||G|S)z Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855. )ritemsr8r])r, also_known_asups r-encoding_aliaseszCharsetMatch.encoding_aliasessn  MOO ( (DAq}!!$$Q''''!##$$Q'''r/c|jSr`r%rIs r-bomzCharsetMatch.bom ##r/c|jSr`rhrIs r-byte_order_markzCharsetMatch.byte_order_markrjr/c$d|jDS)z Return the complete list of possible languages found in decoded sequence. Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'. cg|] }|d S)r).0es r- z*CharsetMatch.languages..s...!...r/r$rIs r-rzCharsetMatch.languagess /.do....r/c|jshd|jvrdSddlm}m}t |jr||jn||j}t|dksd|vrdS|dS|jddS)z Most probable language found in decoded sequence. If none were detected or inferred, the property will return "Unknown". asciiEnglishr)encoding_languagesmb_encoding_languagesz Latin BasedUnknown)r$could_be_from_charsetcharset_normalizer.cdrwrxrr8rG)r,rwrxrs r-languagezCharsetMatch.languages  $444 y X W W W W W W W*$-887%%dm444'' 66  9~~""my&@&@ yQ< q!!$$r/c|jSr`)r#rIs r-r?zCharsetMatch.chaoss $$r/c:|jsdS|jddS)Nr rrrsrIs r-r@zCharsetMatch.coherences# 3q!!$$r/c4t|jdzdSNd)ndigits)roundr?rIs r- percent_chaoszCharsetMatch.percent_chaossTZ#%q1111r/c4t|jdzdSr)rr@rIs r-percent_coherencezCharsetMatch.percent_coherencesT^c)15555r/c|jS)z+ Original untouched bytes. )r!rIs r-rHzCharsetMatch.raws }r/c|jSr`)r'rIs r-submatchzCharsetMatch.submatchs |r/c2t|jdkSNr)rGr'rIs r- has_submatchzCharsetMatch.has_submatchs4<  1$$r/c|j|jSdt|D}ttd|D|_|jS)Nc,g|]}t|Sro)r)rpchars r-rrz*CharsetMatch.alphabets..s-   $(M$     r/ch|]}||Sroro)rprs r- z)CharsetMatch.alphabets..s+L+L+L!!+LA+L+L+Lr/)r&r6sortedlist)r,detected_rangess r- alphabetszCharsetMatch.alphabetssj   +' '  ,/II    &d+L+L+L+L+L&M&MNN##r/c6|jgd|jDzS)z The complete list of encoding that output the exact SAME str result and therefore could be the originating encoding. This list does include the encoding available in property 'encoding'. cg|] }|j Sro)r8)rpms r-rrz6CharsetMatch.could_be_from_charset..s"D"D"D!1:"D"D"Dr/)r"r'rIs r-rzz"CharsetMatch.could_be_from_charsets%"D"Dt|"D"D"DDDr/c|Sz> Kept for BC reasons. Will be removed in 3.0. rorIs r-firstzCharsetMatch.first  r/c|SrrorIs r-bestzCharsetMatch.bestrr/utf_8r8c|j |j|kr/||_t||d|_|jS)z Method to get re-encoded bytes payload using given target encoding. Default to UTF-8. Any errors will be simply ignored by the encoder NOT replaced. Nreplace)r*r6encoder))r,r8s r-outputzCharsetMatch.outputsI  (D,AX,M,M$,D !#&t99#3#3Hi#H#HD ##r/cht|S)zw Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one. )rr hexdigestrIs r-r9zCharsetMatch.fingerprint s& dkkmm$$..000r/r`)r0rr1N)r1r)r))__name__ __module__ __qualname__bytesr6floatboolr r.objectr;rDpropertyrArNrPrrVrYr[r^r8r rfrirlrr|r?r@rrrHrrrrzrrrr9ror/r-rrs~*.'''' '  ' & '"#''''2YFYtYYYY(F(t((((&4%444X4 *e * * *X * U   X  67 6 6 6X 6 W#WWWW # # # ##X $s)   X $T$$$X$$$$$X$/49///X/%#%%%X%6%u%%%X%%5%%%X% 2u222X265666X6UX $~.X%d%%%X% $49 $ $ $X $EtCyEEEXE  $ $s $ $ $ $ $1S111X111r/rceZdZdZddeefdZdeefdZde e e fdefdZ de fd Z defd Zdeddfd Zded fd Zded fdZdS)CharsetMatchesz Container with every CharsetMatch items ordered by default from most probable to the less one. Act like a list(iterable) but does not implements all related methods. Nresultsc6|rt|ng|_dSr`)r_results)r,rs r-r.zCharsetMatches.__init__s+2:w r/r1c#$K|jEd{VdSr`rrIs r-__iter__zCharsetMatches.__iter__s&=         r/itemct|tr |j|St|tr't |d}|jD]}||jvr|cSt )z Retrieve a single item either by its position or encoding name (alias may be used here). Raise KeyError upon invalid index or encoding not present in results. F)r3intrr6rrzKeyError)r,rresults r- __getitem__zCharsetMatches.__getitem__!sv dC  '=& & dC  "T5))D- " "6777!MMM8r/c*t|jSr`rGrrIs r-__len__zCharsetMatches.__len__/s4=!!!r/c2t|jdkSrrrIs r-__bool__zCharsetMatches.__bool__2s4=!!A%%r/ct|ts4tdt |jt |jtkrB|j D]:}|j |j kr(|j |j kr| |dS;|j |t|j |_ dS)z~ Insert a single match. Will be inserted accordingly to preserve sort. Can be inserted as a submatch. z-Cannot append instance '{}' to CharsetMatchesN)r3rr=r5r6r7rGrHrrr9r?r^r]r)r,rmatchs r-r]zCharsetMatches.append5s $ -- ?FF''  tx==, , ,  $(888U[DJ=V=V&&t,,,FF T"""t}-- r/rc.|jsdS|jdS)zQ Simply return the first match. Strict equivalent to matches[0]. NrrrIs r-rzCharsetMatches.bestIs } 4}Qr/c*|S)zP Redundant method, call the method best(). Kept for BC reasons. )rrIs r-rzCharsetMatches.firstQsyy{{r/r`)rrr__doc__r rr.r rrrr6rrrrr]r rrror/r-rrs% ;;\ 2;;;;!(<0!!!! c3h L    """""&$&&&&.<.D....( h~.    x/r/rceZdZdedeedeedeededeededed ed eed efd Ze d e ee ffdZ d efdZ dS)CliDetectionResultpathr8rfalternative_encodingsr|rrr?r@ unicode_path is_preferredc ||_| |_||_||_||_||_||_||_||_| |_ | |_ dSr`) rrr8rfrr|rrr?r@r) r,rr8rfrr|rrr?r@rrs r-r.zCliDetectionResult.__init__]s\ (  0%:"  ", "(r/r1c |j|j|j|j|j|j|j|j|j|j |j d S)N rr8rfrr|rrr?r@rrrrIs r-__dict__zCliDetectionResult.__dict__wsOI $ 5%)%? "1Z - -   r/c0t|jddS)NT) ensure_asciiindent)rrrIs r-to_jsonzCliDetectionResult.to_jsonsT]a@@@@r/N)rrrr6r r rrr.rr rrrror/r-rr\s))3-)s) ) $Cy )  )9))))sm)))))4  $sCx.    X  AAAAAAAr/r)#rK collectionsrencodings.aliasesrhashlibrjsonrrertypingrr r r r r rconstantrrmdrutilsrrrrrr6rCoherenceMatchrrror/r-rs%%%%%%DDDDDDDDDDDDDDDDDD========CCCCCCCCCCD1D1D1D1D1D1D1D1N@@@@@@@@FsEz"',A,A,A,A,A,A,A,A,A,Ar/