jfG ddlmZddlmZmZddlmZmZddlm Z m Z m Z m Z m Z mZmZmZmZmZmZmZmZmZmZGddZGdd eZGd d eZGd d eZGddeZGddeZGddeZGddeZGddeZ dee!dee!de"fdZ#ed d%d e!d!e$d"e"de$fd#Z%d$S)&) lru_cache)ListOptional)COMMON_SAFE_ASCII_CHARACTERSUNICODE_SECONDARY_RANGE_KEYWORD)is_accentuatedis_asciiis_case_variableis_cjk is_emoticon is_hangul is_hiragana is_katakanais_latinis_punctuation is_separator is_symbolis_thai remove_accent unicode_rangecVeZdZdZdedefdZdeddfdZd dZe de fdZ dS) MessDetectorPluginzy Base abstract class used for mess detection plugins. All detectors MUST extend and implement given methods. characterreturnct)z@ Determine if given character should be fed in. NotImplementedErrorselfrs u/builddir/build/BUILD/imunify360-venv-2.3.5/opt/imunify360/venv/lib/python3.11/site-packages/charset_normalizer/md.pyeligiblezMessDetectorPlugin.eligible "!Nct)z The main routine to be executed upon character. Insert the logic in witch the text would be considered chaotic. rrs r!feedzMessDetectorPlugin.feed$s "!r$ct)zB Permit to reset the plugin to the initial state. rr s r!resetzMessDetectorPlugin.reset+r#r$ct)z Compute the chaos ratio based on what your feed() has seen. Must NOT be lower than 0.; No restriction gt 0. rr(s r!ratiozMessDetectorPlugin.ratio1s "!r$rN) __name__ __module__ __qualname____doc__strboolr"r&r)propertyfloatr+r$r!rrs "#"$"""" "c"d"""""""" "u"""X"""r$rcZeZdZd dZdedefdZdeddfdZd dZe de fdZ dS) TooManySymbolOrPunctuationPluginrNcLd|_d|_d|_d|_d|_dS)NrF)_punctuation_count _symbol_count_character_count_last_printable_char_frenzy_symbol_in_wordr(s r!__init__z)TooManySymbolOrPunctuationPlugin.__init__;s0"# !$(!&+###r$rc*|SN isprintablers r!r"z)TooManySymbolOrPunctuationPlugin.eligibleC$$&&&r$c(|xjdz c_||jkro|tvrft|r|xjdz c_nF|dur0t |r!t|dur|xjdz c_||_dS)NrF) r;r<rrr9isdigitrr r:rs r!r&z%TooManySymbolOrPunctuationPlugin.feedFs " 2 2 2!===i(( (''1,'''!!##u,,i((- **e33""a'""$-!!!r$c0d|_d|_d|_dSNr)r9r;r:r(s r!r)z&TooManySymbolOrPunctuationPlugin.resetXs "# !r$c^|jdkrdS|j|jz|jz }|dkr|ndS)Nrg333333?)r;r9r:)r ratio_of_punctuations r!r+z&TooManySymbolOrPunctuationPlugin.ratio]sK  A % %3  #d&8 8  ! "(r1r2r"r&r)r3r4r+r5r$r!r7r7:s,,,,'#'$''''.c.d....$ LuLLLXLLLr$r7cZeZdZd dZdedefdZdeddfdZd dZe de fdZ dS) TooManyAccentuatedPluginrNc"d|_d|_dSrHr;_accentuated_countr(s r!r>z!TooManyAccentuatedPlugin.__init__j !"#r$rc*|Sr@)isalphars r!r"z!TooManyAccentuatedPlugin.eligiblens  """r$ch|xjdz c_t|r|xjdz c_dSdSNr)r;r rQrs r!r&zTooManyAccentuatedPlugin.feedqsJ " ) $ $ )  # #q ( # # # # ) )r$c"d|_d|_dSrHrPr(s r!r)zTooManyAccentuatedPlugin.resetwrRr$cN|jdkrdS|j|jz }|dkr|ndS)NrrJgffffff?rP)r ratio_of_accentuations r!r+zTooManyAccentuatedPlugin.ratio{s@  A % %3  #d&; ; )>(E(E$$3Nr$r,rLr5r$r!rNrNis$$$$###$####)c)d)))) $$$$OuOOOXOOOr$rNcZeZdZd dZdedefdZdeddfdZd dZe de fdZ dS) UnprintablePluginrNc"d|_d|_dSrH)_unprintable_countr;r(s r!r>zUnprintablePlugin.__init__s"# !r$rcdSNTr5rs r!r"zUnprintablePlugin.eligibletr$c|dur,|dur|dkr|xjdz c_|xjdz c_dS)NFr)isspacerBr]r;rs r!r&zUnprintablePlugin.feedsi     5 ( (%%''500V##  # #q ( # # "r$cd|_dSrH)r]r(s r!r)zUnprintablePlugin.resets"#r$c@|jdkrdS|jdz|jz S)NrrJ)r;r]r(s r!r+zUnprintablePlugin.ratios+  A % %3'!+t/DDDr$r,rLr5r$r!r[r[s""""#$#c#d####$$$$EuEEEXEEEr$r[cZeZdZd dZdedefdZdeddfdZd dZe de fdZ dS) SuspiciousDuplicateAccentPluginrNc0d|_d|_d|_dSrH_successive_countr;_last_latin_characterr(s r!r>z(SuspiciousDuplicateAccentPlugin.__init__s !" !%)"""r$rcH|ot|Sr@)rTrrs r!r"z(SuspiciousDuplicateAccentPlugin.eligibles!  "":x ':'::r$cl|xjdz c_|jt|rt|jrr|r)|jr|xjdz c_t |t |jkr|xjdz c_||_dSrV)r;rlr isupperrkrrs r!r&z$SuspiciousDuplicateAccentPlugin.feeds "  & 2y)) 3t9:: 3  "" ,t'A'I'I'K'K ,&&!+&&Y''=9S+T+TTT&&!+&&%."""r$c0d|_d|_d|_dSrHrjr(s r!r)z%SuspiciousDuplicateAccentPlugin.resets !" !%)"""r$c@|jdkrdS|jdz|jz S)NrrJrE)r;rkr(s r!r+z%SuspiciousDuplicateAccentPlugin.ratios+  A % %3&*d.CCCr$r,rLr5r$r!rhrhs**** ;#;$;;;; /c /d / / / /**** DuDDDXDDDr$rhcZeZdZd dZdedefdZdeddfdZd dZe de fdZ dS) SuspiciousRangerNc0d|_d|_d|_dSrH)"_suspicious_successive_range_countr;_last_printable_seenr(s r!r>zSuspiciousRange.__init__s 23/ !$(!!!r$rc*|Sr@rArs r!r"zSuspiciousRange.eligiblerCr$cD|xjdz c_|st|s |tvr d|_dS|j ||_dSt |j}t |}t ||r|xjdz c_||_dSrV)r;rcrrrvr is_suspiciously_successive_rangeru)r runicode_range_aunicode_range_bs r!r&zSuspiciousRange.feeds "      i(( 888(,D % F  $ ,(1D % F'  %  ( 22 +O_ M M 9  3 3q 8 3 3$-!!!r$c0d|_d|_d|_dSrH)r;rurvr(s r!r)zSuspiciousRange.resets !23/$(!!!r$cT|jdkrdS|jdz|jz }|dkrdS|S)NrrJrEg?)r;ru)r ratio_of_suspicious_range_usages r!r+zSuspiciousRange.ratiosH  A % %3  3a 7  !+"' +S 0 03..r$r,rLr5r$r!rsrss)))) '#'$''''.c.d....2))))  /u / / /X / / /r$rscZeZdZd dZdedefdZdeddfdZd dZe de fdZ dS) SuperWeirdWordPluginrNcd|_d|_d|_d|_d|_d|_d|_d|_d|_dS)NrF) _word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchr;_bad_character_count_buffer_buffer_accent_countr(s r!r>zSuperWeirdWordPlugin.__init__sO #$ $)!#( !$%! $%!!!r$rcdSr_r5rs r!r"zSuperWeirdWordPlugin.eligible r`r$c|rd|j|g|_t|r|xjdz c_|jdur|t |dust|r\t|durKt|dur:t|dur)t|durt|durd|_dS|jsdS| st|st|r"|jr|xjdz c_t!|j}|xj|z c_|dkre|j|z dkrd|_t|jdr6|jdr|xjdz c_d|_|dkr|jr|xjdz c_d|_|jr9|xjdz c_|xjt!|jz c_d|_d|_d|_d |_dS|d vr>|dur*t1|rd|_|xj|z c_dSdSdSdS) NrrFTg(\?r>_-<=>|~)rTjoinrr rrrr rrrrrcrrrlenr;rrorrrrFr)r r buffer_lengths r!r&zSuperWeirdWordPlugin.feeds      77DL)#<==DLi(( /))Q.))(E11i((E11^I5N5N19%%..i((E11 **e33 **e33I&&%//+/( F|  F     " )#<#<" &@LY@W@W" &l" &    !   --M  ! !] 2 ! !!!,}->>)),1)',D $DL()D % % % @ @ @!!##u,,)$$-)-D % LLI %LLLL A @,,,,r$cvd|_d|_d|_d|_d|_d|_d|_d|_dS)NrFr)rrrrrr;rrr(s r!r)zSuperWeirdWordPlugin.resetDsG $)!#(   !$%!#$   r$cP|jdkr |jdkrdS|j|jz S)N rrJ)rrrr;r(s r!r+zSuperWeirdWordPlugin.ratioNs3  r ! !d&>!&C&C3(4+@@@r$r,rLr5r$r!rrs & & & &#$4&c4&d4&4&4&4&l%%%%AuAAAXAAAr$rc^eZdZdZd dZdedefdZdeddfdZd dZ e de fd Z dS) CjkInvalidStopPluginu GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and can be easily detected. Searching for the overuse of '丅' and '丄'. rNc"d|_d|_dSrH_wrong_stop_count_cjk_character_countr(s r!r>zCjkInvalidStopPlugin.__init__\!"$%!!!r$rcdSr_r5rs r!r"zCjkInvalidStopPlugin.eligible`r`r$ct|dvr|xjdz c_dSt|r|xjdz c_dSdS)N>丄丅r)rr rrs r!r&zCjkInvalidStopPlugin.feedcsZ  & &  " "a ' " " F )   +  % % * % % % % + +r$c"d|_d|_dSrHrr(s r!r)zCjkInvalidStopPlugin.resetjrr$c:|jdkrdS|j|jz S)NrJ)rrr(s r!r+zCjkInvalidStopPlugin.rations&  $r ) )3%(AAAr$r,) r-r.r/r0r>r1r2r"r&r)r3r4r+r5r$r!rrVs &&&&#$+c+d++++&&&&BuBBBXBBBr$rcZeZdZd dZdedefdZdeddfdZd dZe de fdZ dS) ArchaicUpperLowerPluginrNchd|_d|_d|_d|_d|_d|_d|_dS)NFrT)_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalr;_last_alpha_seen_current_ascii_onlyr(s r!r>z ArchaicUpperLowerPlugin.__init__vs? /0,-.*340 ! $#'   r$rcdSr_r5rs r!r"z ArchaicUpperLowerPlugin.eligibler`r$c|ot|}|du}|r|jdkrt|jdkr4|dur|jdur|xj|jz c_d|_d|_d|_d|_|xj dz c_ d|_dS|jdurt|durd|_|j| r|j s-| rB|j r)|jdur|xjdz c_d|_nd|_nd|_|xj dz c_ |xjdz c_||_dS)NFr@rTrE) rTr rrFrrrrrr;r roislower)r r is_concerned chunk_seps r!r&zArchaicUpperLowerPlugin.feeds ((**J/? /J/J  E)  =AA4::%%''500,558868823D .34D 0$(D !DI  ! !Q & ! !'+D $ F  #t + +0C0Cu0L0L',D $  ,!!## "(=(E(E(G(G "!!## "(,(=(E(E(G(G "9$$66!;66 %DII $DII!  " ,,1,, )r$chd|_d|_d|_d|_d|_d|_d|_dS)NrFT)r;rrrrrrr(s r!r)zArchaicUpperLowerPlugin.resets? !/0,-.*340 $ #'   r$c:|jdkrdS|j|jz S)NrrJ)r;rr(s r!r+zArchaicUpperLowerPlugin.ratios&  A % %37$:OOOr$r,rLr5r$r!rrus ( ( ( (#$(*c(*d(*(*(*(*T((((PuPPPXPPPr$rrzr{rc||dS||krdSd|vrd|vrdSd|vsd|vrdSd|vsd|vr d|vsd|vrdS|d|d}}|D]}|tvr ||vrdS|dv|dv}}|s|r d |vsd |vrdS|r|rdSd |vsd |vrd |vsd |vrdS|d ks|d krdSd |vs d |vs|d vr|d vrd |vsd |vrdSd|vsd|vrdSdS)za Determine if two Unicode range seen next to each other can be considered as suspicious. NTFLatin Emoticons Combining )HiraganaKatakanaCJKHangulz Basic Latin)rr PunctuationForms)splitr)rzr{keywords_range_akeywords_range_belrange_a_jp_charsrange_b_jp_charss r!ryrys /"9t/))u/!!g&@&@uo%%)G)Gu ?""g&@&@&&+*H*Hu)8)>)> **S!!' 0 0 0  ! ! !55 "   33 ' ,   E_$<$<u,u?""h/&A&A O # #u'?'?5 m + +-/O/O5   E_$<$<333 7 7 7 O + +}/O/O5 o % %O)C)C5 4r$i)maxsize皙?Fdecoded_sequencemaximum_thresholddebugcdtD}t|dz}d}|dkrd}n |dkrd}nd}t|d zt |D]m\}}|D],} | |r| |-|d kr ||zd ks ||dz kr!td |D}||krnn|r|D]} t| j | j t|d S) zw Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier. c"g|] }| Sr5r5).0md_classs r! zmess_ratio..s+ r$rrJi ir rc3$K|] }|jV dSr@)r+)rdts r! zmess_ratio..&s$!?!?r"(!?!?!?!?!?!?r$) r__subclasses__rzipranger"r&sumprint __class__r+round) rrr detectorslengthmean_mess_ratio!intermediary_mean_mess_ratio_calcrindexdetectorrs r! mess_ratiorsh#5#D#D#F#FI! " "Q &FO ||,.)) 4,.)),/) 04 7vGG   5! ) )H  ++ ) i((( AII%"CCqHH fqj !!?!?Y!?!?!???O"333 * * *B ", ) ) ) ) ! $ $$r$N)rF)& functoolsrtypingrrconstantrrutilsr r r r r rrrrrrrrrrrr7rNr[rhrsrrrr1r2ryr4rr5r$r!rs[!!!!!!!!SSSSSSSS&""""""""D,L,L,L,L,L'9,L,L,L^OOOOO1OOO8EEEEE*EEE8"D"D"D"D"D&8"D"D"DJ3/3/3/3/3/(3/3/3/lWAWAWAWAWA-WAWAWAtBBBBB-BBB>IPIPIPIPIP0IPIPIPXCc]C5=c]C CCCCL 4IN'%'%'%.3'%BF'% '%'%'%'%'%'%r$