U ev.@slddlZddlZddlZddlmZddlmZddlmZddl m Z ddl m Z m Z mZmZmZmZddlmZdd lmZmZmZmZmZmZeed eed d d Zeed eed ddZeed eeed ddZeed eed ddZ eed eed ddZ!eed eed ddZ"eed eed ddZ#eed eed ddZ$eed eed ddZ%eed eed ddZ&eed eed d d!Z'eed eed d"d#Z(eed eed d$d%Z)eed eed d&d'Z*eed eed d(d)Z+eed eed d*d+Z,ee-ed eed,d-d.Z.eed eed d/d0Z/dSe0e1eed2d3d4Z2ed5d eed6d7d8Z3e0eeee0fd9d:d;Z4eed<d=d>Z5dTeeed@dAdBZ6ee edCdDdEZ7eee8dFdGdHZ9eeedFdIdJZ:dKej;dLfee1eddMdNdOZdS)VN)IncrementalDecoder)aliases) lru_cache)findall) GeneratorListOptionalSetTupleUnion)MultibyteIncrementalDecoder)ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize) characterreturncCsfzt|}Wntk r$YdSXd|kpdd|kpdd|kpdd|kpdd|kpdd|kpdd|kpdd |kS) NFz WITH GRAVEz WITH ACUTEz WITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz WITH TILDEz WITH MACRONzWITH RING ABOVE unicodedataname ValueErrorr descriptionrH/opt/hc_python/lib64/python3.8/site-packages/charset_normalizer/utils.pyis_accentuateds&rcCs.t|}|s|S|d}tt|ddS)N r)r decompositionsplitchrint)rZ decomposedcodesrrr remove_accent(s   r'cCs.t|}tD]\}}||kr|SqdS)zK Retrieve the Unicode range official name from a single character. N)ordritems)rZ character_ord range_nameZ ord_rangerrr unicode_range3s  r+cCs.zt|}Wntk r$YdSXd|kS)NFZLATINrrrrris_latinAs r,cCs2t|}d|krdSt|}|dkr*dSd|kS)NPTF Punctuationrcategoryr+rcharacter_categorycharacter_rangerrris_punctuationJs r4cCsBt|}d|ksd|krdSt|}|dkr2dSd|ko@|dkS)NSNTFZFormsZLor/r1rrr is_symbolYs r7cCs$t|}|dkrdSd|kp"d|kS)NFZ EmoticonsZ Pictographs)r+)rr3rrr is_emoticonhsr8cCs.|s|dkrdSt|}d|kp,|dkS)N>|<+>TZ>PdPoPc)isspacerr0)rr2rrr is_separatorrs rBcCs||kSN)islowerisupperrrrris_case_variable|srGcCs.zt|}Wntk r$YdSXd|kS)NFCJKrrZcharacter_namerrris_cjks rJcCs.zt|}Wntk r$YdSXd|kS)NFZHIRAGANArrIrrr is_hiraganas rKcCs.zt|}Wntk r$YdSXd|kS)NFZKATAKANArrIrrr is_katakanas rLcCs.zt|}Wntk r$YdSXd|kS)NFZHANGULrrIrrr is_hanguls rMcCs.zt|}Wntk r$YdSXd|kS)NFZTHAIrrIrrris_thais rNcCs.zt|}Wntk r$YdSXd|kS)NFARABICrrIrrr is_arabics rPcCs6zt|}Wntk r$YdSXd|ko4d|kS)NFrOz ISOLATED FORMrrIrrris_arabic_isolated_forms rQ)r*rcstfddtDS)Nc3s|]}|kVqdSrCr).0keywordr*rr sz-is_unicode_range_secondary..)anyrrTrrTris_unicode_range_secondarysrWcCs(|dko&|dko&|dko&|dkS)NFu)rA isprintablerFrrris_unprintables  rZ )sequence search_zonercCst|tstt|}tt|dt||jddd}t|dkrHdS|D]N}| dd}t D]0\}}||kr|S||krh|SqhqLdS)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nasciiignoreerrorsr-_) isinstancebytes TypeErrorlenrrmindecodelowerreplacerr))r\r]Zseq_lenresultsZspecified_encodingencoding_alias encoding_ianarrrany_specified_encodings"   ro)rrcCs |dkpttd|jtS)zQ Verify is a specific encoding is a multi byte one based on it IANA name > utf_16_beutf_32 utf_32_le utf_16_le utf_8_sigutf_8utf_7 utf_32_beutf_16 encodings.{}) issubclass importlib import_moduleformatrr )rrrris_multi_byte_encodings  r)r\rcCsJtD]@}t|}t|tr |g}|D]}||r$||fSq$qdS)z9 Identify and extract SIG/BOM in given sequence. )N)rrdre startswith)r\ iana_encodingZmarksmarkrrridentify_sig_or_bom s  r)rrcCs|dkS)N>rrryr)rrrrshould_strip_sig_or_bomsrT)cp_namestrictrcCsL|dd}tD]\}}|||fkr|Sq|rHtd||S)Nrbrcz Unable to retrieve IANA for '{}')rjrkrr)rr~)rrrmrnrrr iana_name"s  r)decoded_sequencercCs4t}|D] }t|}|dkr q ||q t|SrC)setr+addlist)rrangesrr3rrr range_scan2s r) iana_name_a iana_name_brc Cst|st|rdStd|j}td|j}|dd}|dd}d}tdD]*}t|g}||||krX|d7}qX|dS) Ngrzr_r`rr )rr|r}r~rrangereri) rrZ decoder_aZ decoder_bZid_aZid_bZcharacter_match_countiZ to_be_decodedrrr cp_similarity@s      rcCs|tko|t|kS)z Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r)rrrrr is_cp_similarXs rZcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)s)rlevel format_stringrcCs:t|}||t}|t|||dSrC)logging getLoggersetLevel StreamHandler setFormatter Formatter addHandler)rrrloggerhandlerrrrset_logging_handlercs   r) sequencesrnoffsets chunk_sizebom_or_sig_availablestrip_sig_or_bom sig_payloadis_multi_byte_decoderdecoded_payloadrc cs*|r6|dkr6|D]"} || | |} | s,q4| Vqn|D]} | |} | t|dkrXq:|| | |} |r||dkr||| } | j||rdndd} |r| dkrt|d} |r| d| |krt| | dd D]H}||| } |r|dkr|| } | j|dd} | d| |krqq| Vq:dS) NFr_rr`rr!)rgrirhr)rrnrrrrrrrrchunkZ chunk_endZ cut_sequenceZchunk_partial_size_chkjrrrcut_sequence_chunksps>       r)r[)T)N)?r|rrcodecsrZencodings.aliasesr functoolsrrertypingrrrr r r Z_multibytecodecr Zconstantrrrrrrstrboolrr'r+r,r4r7r8rBrGrJrKrLrMrNrPrQrgrWrZrer%rorrrrrfloatrrINFOrrrrrrrs