bg9!JddlmZmZddlmZddlmZGddeZdS))ListUnion) CharSetProber) ProbingStatec8eZdZdZdZdZdfd Zdfd Zede fdZ ede fd Z de fd Z de fd Zdefd Zdefd ZdefdZdefdZdeeddfdZdeeddfdZdeeefdefdZedefdZde fdZxZS) UTF1632Proberad This class simply looks for occurrences of zero bytes, and infers whether the file is UTF16 or UTF32 (low-endian or big-endian) For instance, files looking like ( [nonzero] )+ have a good probability to be UTF32BE. Files looking like ( [nonzero] )+ may be guessed to be UTF16BE, and inversely for little-endian varieties. gGz?returnNc2td|_dgdz|_dgdz|_t j|_gd|_d|_ d|_ d|_ d|_ d|_ d|_|dS)NrrrrrF)super__init__position zeros_at_modnonzeros_at_modr DETECTING_statequadinvalid_utf16beinvalid_utf16leinvalid_utf32beinvalid_utf32le'first_half_surrogate_pair_detected_16be'first_half_surrogate_pair_detected_16leresetself __class__s L/opt/cloudlinux/venv/lib64/python3.11/site-packages/chardet/utf1632prober.pyrzUTF1632Prober.__init__)s  C!G !sQw",  LL $$$$7<47<4 c td|_dgdz|_dgdz|_t j|_d|_d|_ d|_ d|_ d|_ d|_ gd|_dS)Nrr Fr)rrrrrrrrrrrrrrrrs r!rzUTF1632Prober.reset8s   C!G !sQw", $$$$7<47<4 LL r"c|rdS|rdS|rdS|rdSdS)Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)is_likely_utf32beis_likely_utf32leis_likely_utf16beis_likely_utf16lers r! charset_namezUTF1632Prober.charset_nameFsk  ! ! # # :  ! ! # # :  ! ! # # :  ! ! # # :xr"cdS)Nr)s r!languagezUTF1632Prober.languageSsrr"c2td|jdz S)N?g@maxrr)s r!approx_32bit_charsz UTF1632Prober.approx_32bit_charsW3 +,,,r"c2td|jdz S)Nr0g@r1r)s r!approx_16bit_charsz UTF1632Prober.approx_16bit_charsZr4r"c|}||jkok|jd|z |jkoR|jd|z |jko9|jd|z |jko |jd|z |jko|j SNrr)r3MIN_CHARS_FOR_DETECTIONrEXPECTED_RATIOrrr approx_charss r!r%zUTF1632Prober.is_likely_utf32be]s..00 t;;  a < /$2E E )!!$|3d6II )!!$|3d6II )$Q',69LL )((  r"c|}||jkok|jd|z |jkoR|jd|z |jko9|jd|z |jko |jd|z |jko|j Sr8)r3r;rr<rrr=s r!r&zUTF1632Prober.is_likely_utf32legs..00 t;;   #l 2T5H H )!!$|3d6II )!!$|3d6II )!!$|3d6II )((  r"c|}||jkoU|jd|jdz|z |jko.|jd|jdz|z |jko|j S)Nrr:rr9)r6r;rr<rrr=s r!r'zUTF1632Prober.is_likely_utf16beq..00 t;;  !! $t';A'> >, N! " )"1%(9!(<< L!" )((  r"c|}||jkoU|jd|jdz|z |jko.|jd|jdz|z |jko|j S)Nrr9rr:)r6r;rr<rrr=s r!r(zUTF1632Prober.is_likely_utf16le{rAr"rcH|ddks:|ddks.|ddkr)|ddkrd|dcxkrdkr nnd|_|ddks;|ddks/|ddkr,|ddkr"d|dcxkrdkrnd Sd|_d Sd Sd Sd S) z Validate if the quad of bytes is valid UTF-32. UTF-32 is valid in the range 0x00000000 - 0x0010FFFF excluding 0x0000D800 - 0x0000DFFF https://en.wikipedia.org/wiki/UTF-32 rrr9Tr:N)rr)rrs r!validate_utf32_charactersz'UTF1632Prober.validate_utf32_characterss GqLLAw~~Q1 aA$$q'2I2I2I2IT2I2I2I2I2I#'D GqLLAw~~Q1 aA$$q'2I2I2I2IT2I2I2I2I2I2I#'D  2I2Ir"pairc|js>>..ty1~>>>Avv!$'''1,''''$T***a/*** MMQ MMMzr"c|jtjtjhvr|jS|dkrtj|_n|jdkrtj|_|jS)Ng?i)rrNOT_MEFOUND_ITget_confidencerr)s r!rOzUTF1632Prober.statesf ;<. 0EF F F;     4 ' '&/DKK ]X % %'-DK{r"c|s<|s(|s|rdndS)Ng333333?g)r(r'r&r%r)s r!rVzUTF1632Prober.get_confidencesh&&(( ))++ ))++   ))++  DD r")r N) __name__ __module__ __qualname____doc__r;r<rrpropertystrr*r.floatr3r6boolr%r&r'r(rintrGrLrbytes bytearrayrrRrOrV __classcell__)r s@r!r r s!N       ! ! ! ! ! ! c   X #X-E-----E---- 4     4     4     4    (d3i(D((((,,d3i,D,,,,@ U5)#34      |   X            r"r N)typingrr charsetproberrenumsrr r-r"r!rgs*((((((F F F F F MF F F F F r"