jf\$  ddlZn#e$rddlZYnwxYwddlZddlZddlmZddlmZddl m Z ddl m Z ddl mZmZmZmZmZddlmZdd lmZmZmZmZmZmZe e d ed efd Ze e d ed efdZe e d ed eefdZ e e d ed efdZ!d ed efdZ"e e d ed efdZ#e e d ed efdZ$e e d ed efdZ%e e d ed efdZ&e e d ed efdZ'd ed efdZ(e e d ed efdZ)e e d ed efdZ*e e d ed efdZ+e e d ed efdZ,e e d ed efdZ-e e.e ded efdZ/d8d e0d!e1d eefd"Z2e d# d$ed efd%Z3d e0d eeee0ffd&Z4d'ed efd(Z5d9d*ed+ed efd,Z6d-ed eefd.Z7d/ed0ed e8fd1Z9d/ed0ed efd2Z:d3ej;d4fd$ed5e1d6ed dfd7Z!@!@ I I % %    & 4r+cX tj|}n#t$rYdSwxYwd|vS)NFLATINrrs ris_latinr5CsF!&y11 uu k !!r cV |dn#t$rYdSwxYwdS)NasciiFT)encodeUnicodeEncodeErrorrs ris_asciir;LsE!!!! uu 4s  &&cdtj|}d|vrdSt|}|dSd|vS)NPTF Punctuationrcategoryr2rcharacter_categorycharacter_ranges ris_punctuationrDTsG$-i88    t#I..Ou O ++r+cltj|}d|vsd|vrdSt|}|dSd|vS)NSNTFFormsr?rAs r is_symbolrIcsR$-i88    C+=$=$=t#I..Ou o %%r+c0t|}|dSd|vS)NF Emoticons)r2)rrCs r is_emoticonrLrs%#I..Ou / ))r+cf|s|dvrdStj|}d|vS)N>|+,;<>TZ)isspacerr@rrBs r is_separatorrW|sCi+KKKt$-i88 $ $$r+cV||kSN)islowerisupperr:s ris_case_variabler\s%     )"3"3"5"5 55r+c6tj|}|dkS)NCo)rr@rVs ris_private_use_onlyr_s$-i88  %%r+cX tj|}n#t$rYdSwxYwd|vS)NFCJKrrcharacter_names ris_cjkrdsH$))44 uu N ""r cX tj|}n#t$rYdSwxYwd|vS)NFHIRAGANArrbs r is_hiraganargH$))44 uu  ''r cX tj|}n#t$rYdSwxYwd|vS)NFKATAKANArrbs r is_katakanarkrhr cX tj|}n#t$rYdSwxYwd|vS)NFHANGULrrbs r is_hangulrnsH$))44 uu ~ %%r cX tj|}n#t$rYdSwxYwd|vS)NFTHAIrrbs ris_thairqsH$))44 uu ^ ##r r0cDtfdtDS)Nc3 K|]}|vV dSrY).0keywordr0s r z-is_unicode_range_secondary..s(TTw*$TTTTTTr+)anyr)r0s`ris_unicode_range_secondaryrys' TTTT4STTT T TTr+sequence search_zonec t|tstt|}t t |dt ||dd}t|dkrdS|D][}| dd}tj D]\}}||kr|ccS||kr|ccS\dS)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nr7ignoreerrorsr-_) isinstancebytes TypeErrorlenrrmindecodelowerreplacerr.)r{r|seq_lenresultsspecified_encodingencoding_alias encoding_ianas rany_specified_encodingrs h & &(mmG',3w ,,,-44WX4NNG  7||qt%%%/5577??SII-4]__ % % )NM!333$$$$$$ 222$$$$$$3 % 4r+rc|dvp>ttjd|jt S)zQ Verify is a specific encoding is a multi byte one based on it IANA name > utf_7utf_8utf_16utf_32 utf_16_be utf_16_le utf_32_be utf_32_le utf_8_sig encodings.{}) issubclass importlib import_moduleformatrr )rs ris_multi_byte_encodingrsL      5 5d ; ;<<O#   r+ctD]I}t|}t|tr|g}|D]}||r||fccS JdS)z9 Identify and extract SIG/BOM in given sequence. )Nr+)rrr startswith)r{ iana_encodingmarksmarks ridentify_sig_or_bomrs (++ }- eU # # GE + +D""4(( +$d****** + + 9r+rc |dvS)N>rrrt)rs rshould_strip_sig_or_bomr s  4 44r+Tcp_namestrictc|dd}tjD]\}}|||fvr|cS|r"t d||S)Nrrz Unable to retrieve IANA for '{}')rrrr.rr)rrrrs r iana_namersmmoo%%c3//G)0!!% ~}5 5 5  6M;BB7KKLLL Nr+decoded_sequencect}|D])}t|}|||*t|SrY)setr2addlist)rrangesrrCs r range_scanrsQ UUF%$$ ' 22  "  ?#### <<r+ iana_name_a iana_name_bct|st|rdStjd|j}tjd|j}|d}|d}d}t dD]C}t |g}||||kr|dz }D|dz S) Ngrr~rrr )rrrrrrangerr) rr decoder_a decoder_bid_aid_bcharacter_match_counti to_be_decodeds r cp_similarityr+sk**.D[.Q.Qs'(=(=k(J(JKK^I'(=(=k(J(JKK^I 9H % % %D 9H % % %D 3ZZ''qc ;;} % %])C)C C C !Q & ! 3 &&r+c2|tvo|t|vS)z Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r)rrs r is_cp_similarr@s% -- ? 1+> >r+charset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevel format_stringctj|}||tj}|tj|||dSrY)logging getLoggersetLevel StreamHandler setFormatter Formatter addHandler)rrrloggerhandlers rset_logging_handlerrKsm  t $ $F OOE#%%G *=99::: gr+)rz)T)= unicodedata2r ImportErrorrrcodecsrencodings.aliasesr functoolsrrertypingrrr r r _multibytecodecr constantrrrrrrstrboolrr*r2r5r;rDrIrLrWr\r_rdrgrkrnrqrryrr'rrrrrrfloatrrINFOrrtr+rrs&&&&&%%%%%%%%%%%%44444444444444777777 *+++ c d   ,+  *+++"S"S""",+" *+++ S Xc]   ,+  *+++""""",+" *+++ ,c ,d , , ,,+ , *+++ & & & & &,+ & *+++*3*4***,+* *+++%C%D%%%,+% *+++66666,+6&3&4&&&&  *+++#c#d###,+# *+++(3(4(((,+( *+++(3(4(((,+( *+++&&&&&,+& *+++$s$t$$$,+$ 33.//000U3U4UUU10UUQT : 3(%E(3-2F,G$53545555  s D C      c    's''''''*s%D             s