7Rewc1@sddlmZmZmZddlmZddlZddlZddlm Z m Z m Z m Z ddlm Z mZddlmZddlmZydd lmZWnek reZYnXydd lmZWn(ek r Gd d d eZYnXed de DZedde DZedde DZeeddgBZejdZeddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3g Zejd4Z iZ!Gd5d6d6eZ"ddEdEd7d8Z%Gd9d:d:eZ&Gd;d<d<e&Z'Gd=d>d>e(Z)Gd?d@d@eZ*GdAdBdBeZ+dCdDZ,dS(Fi(uabsolute_importudivisionuunicode_literals(u text_typeNi(uEOFuspaceCharactersu asciiLettersuasciiUppercase(u encodingsuReparseException(uutils(uStringIO(uBytesIO(uBufferedIOBasecBs|EeZdZdS(uBufferedIOBaseN(u__name__u __module__u __qualname__(u __locals__((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuBufferedIOBasesuBufferedIOBasecCsg|]}|jdqS(uascii(uencode(u.0uitem((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu s u cCsg|]}|jdqS(uascii(uencode(u.0uitem((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu s cCsg|]}|jdqS(uascii(uencode(u.0uitem((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu s s>st|j||krd|t|j|8}|d7}q'W||g|_dS(Nii(u_bufferedBytesuAssertionErrorulenubufferuposition(uselfuposuoffsetui((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuseekAsuBufferedStream.seekcCsp|js|j|S|jdt|jkr_|jdt|jdkr_|j|S|j|SdS(Niii(ubufferu _readStreamupositionulenu_readFromBuffer(uselfubytes((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyureadJs     uBufferedStream.readcCstdd|jDS(NcSsg|]}t|qS((ulen(u.0uitem((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu Ts u1BufferedStream._bufferedBytes..(usumubuffer(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu_bufferedBytesSsuBufferedStream._bufferedBytescCsJ|jj|}|jj||jdd7 Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) parseMeta - Look for a element containing encoding information u􏿿iu [-]u0([-](?![-])|(?>s u5HTMLUnicodeInputStream.charsUntil..u^%su[%s]+N(ucharsUntilRegExuKeyErroruorduAssertionErrorujoinureucompileumatchuchunku chunkOffsetuNoneu chunkSizeuenduappendu readChunk( uselfu charactersuoppositeucharsucuregexurvumuendur((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu charsUntil0s2   &    u!HTMLUnicodeInputStream.charsUntilcCso|dk rk|jdkr=||j|_|jd7_qk|jd8_|j|j|ksktndS(Nii(uNoneu chunkOffsetuchunku chunkSizeuAssertionError(uselfuchar((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuunget_s  uHTMLUnicodeInputStream.ungetNF(u__name__u __module__u __qualname__u__doc__u_defaultChunkSizeu__init__uresetu openStreamu _positionupositionucharuNoneu readChunkucharacterErrorsUCS4ucharacterErrorsUCS2uFalseu charsUntiluunget(u __locals__((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuHTMLUnicodeInputStreams !    (  /uHTMLUnicodeInputStreamcBs}|EeZdZdZdddddZddZddZdddd Z d d Z d d Z ddZ dS(uHTMLBinaryInputStreamuProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. cCs|j||_tj||jt|df|_d|_d|_d|_|jddkr|j |||_n|j dS(uInitialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) parseMeta - Look for a element containing encoding information ucertainiidu windows-1252iN( u openStreamu rawStreamuHTMLUnicodeInputStreamu__init__u codecNameu charEncodingu numBytesMetaunumBytesChardetudefaultEncodinguNoneudetectEncodingureset(uselfusourceuencodingu parseMetauchardet((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu__init__xs   uHTMLBinaryInputStream.__init__cCs6tj|jd|jd|_tj|dS(Niureplace(ucodecsu getreaderu charEncodingu rawStreamu dataStreamuHTMLUnicodeInputStreamureset(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuresets uHTMLBinaryInputStream.resetc CsVt|dr|}n t|}y|j|jWnt|}YnX|S(uvProduces a file object from source. source can be either a file object, local filename or a string. uread(uhasattruBytesIOuseekutelluBufferedStream(uselfusourceustream((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu openStreams  u HTMLBinaryInputStream.openStreamc Cs|j}d}|dkr9|r9|j}d}n|dkr:|r:d}yyddlm}Wn"tk rddlm}YnXg}|}x[|js|jj |j }t |t st |sPn|j||j|qW|j|jd}|jjdWq:tk r6Yq:Xn|dkrXd}|j}nidd6} |j| kr| |j}n||fS(Nucertainu tentativei(uUniversalDetectoruencodingu windows-1252u iso-8859-1(u detectBOMuNoneudetectEncodingMetaucharade.universaldetectoruUniversalDetectoru ImportErroruchardet.universaldetectorudoneu rawStreamureadunumBytesChardetu isinstanceubytesuAssertionErroruappendufeeducloseuresultuseekudefaultEncodingulower( uselfu parseMetauchardetuencodingu confidenceuUniversalDetectorubuffersudetectorubufferu encodingSub((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyudetectEncodingsB             u$HTMLBinaryInputStream.detectEncodingcCs|jddkstt|}|d kr:d}n|dkrJdS||jdkrv|jddf|_nF|jjd|j|df|_td|jd|fdS( Niucertainuutf-16u utf-16-beu utf-16-leuutf-8iuEncoding changed from %s to %s(uutf-16u utf-16-beu utf-16-le(u charEncodinguAssertionErroru codecNameuNoneu rawStreamuseekuresetuReparseException(uselfu newEncoding((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuchangeEncodings     u$HTMLBinaryInputStream.changeEncodingcCsidtj6dtj6dtj6dtj6dtj6}|jjd}t|t s_t |j |dd}d}|s|j |}d}|s|j |dd }d }qn|jj |r|pd |S( uAttempts to detect at BOM at the start of the stream. If an encoding can be determined from the BOM return the name of the encoding otherwise return Noneuutf-8u utf-16-leu utf-16-beu utf-32-leu utf-32-beiNiii( ucodecsuBOM_UTF8u BOM_UTF16_LEu BOM_UTF16_BEu BOM_UTF32_LEu BOM_UTF32_BEu rawStreamureadu isinstanceubytesuAssertionErrorugetuseek(uselfubomDictustringuencodinguseek((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu detectBOMs   uHTMLBinaryInputStream.detectBOMcCsk|jj|j}t|ts*tt|}|jjd|j}|dkrgd}n|S(u9Report the encoding declared by the meta element iuutf-16u utf-16-beu utf-16-leuutf-8(uutf-16u utf-16-beu utf-16-le( u rawStreamureadu numBytesMetau isinstanceubytesuAssertionErroruEncodingParseruseeku getEncoding(uselfubufferuparseruencoding((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyudetectEncodingMetas    u(HTMLBinaryInputStream.detectEncodingMetaNT( u__name__u __module__u __qualname__u__doc__uNoneuTrueu__init__uresetu openStreamudetectEncodinguchangeEncodingu detectBOMudetectEncodingMeta(u __locals__((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuHTMLBinaryInputStreamps(  -  uHTMLBinaryInputStreamcBs|EeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ e e e Z ddZe eZeddZddZddZddZdS(u EncodingBytesuString-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raisedcCs+t|tsttj||jS(N(u isinstanceubytesuAssertionErroru__new__ulower(uselfuvalue((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu__new__&suEncodingBytes.__new__cCs d|_dS(Nii(u _position(uselfuvalue((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu__init__*suEncodingBytes.__init__cCs|S(N((uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu__iter__-suEncodingBytes.__iter__cCsV|jd}|_|t|kr/tn|dkrDtn|||dS(Nii(u _positionulenu StopIterationu TypeError(uselfup((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu__next__0s    uEncodingBytes.__next__cCs |jS(N(u__next__(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyunext8suEncodingBytes.nextcCs\|j}|t|kr$tn|dkr9tn|d|_}|||dS(Nii(u _positionulenu StopIterationu TypeError(uselfup((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuprevious<s    uEncodingBytes.previouscCs+|jt|krtn||_dS(N(u _positionulenu StopIteration(uselfuposition((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu setPositionEs uEncodingBytes.setPositioncCs<|jt|krtn|jdkr4|jSdSdS(Ni(u _positionulenu StopIterationuNone(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu getPositionJs  uEncodingBytes.getPositioncCs||j|jdS(Ni(uposition(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyugetCurrentByteTsuEncodingBytes.getCurrentBytecCsf|j}xM|t|krX|||d}||krK||_|S|d7}q W||_dS(uSkip past a list of charactersiN(upositionulenu _positionuNone(uselfucharsupuc((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuskipYs    uEncodingBytes.skipcCsf|j}xM|t|krX|||d}||krK||_|S|d7}q W||_dS(Ni(upositionulenu _positionuNone(uselfucharsupuc((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu skipUntiles    uEncodingBytes.skipUntilcCsT|j}|||t|}|j|}|rP|jt|7_n|S(uLook for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone(upositionulenu startswith(uselfubytesupudataurv((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu matchBytesps  uEncodingBytes.matchBytescCsn||jdj|}|dkrd|jdkrCd|_n|j|t|d7_dStdS(uLook for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the matchNiiiiT(upositionufindu _positionulenuTrueu StopIteration(uselfubytesu newPosition((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyujumpTo{s  uEncodingBytes.jumpToN(u__name__u __module__u __qualname__u__doc__u__new__u__init__u__iter__u__next__unextupreviousu setPositionu getPositionupropertyupositionugetCurrentByteu currentByteuspaceCharactersBytesuskipu skipUntilu matchBytesujumpTo(u __locals__((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu EncodingBytes"s           u EncodingBytescBs|EeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS(uEncodingParseru?Mini parser for detecting character encoding from meta elementscCst||_d|_dS(u3string - the data to work on for encoding detectionN(u EncodingBytesudatauNoneuencoding(uselfudata((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu__init__suEncodingParser.__init__c Csd|jfd|jfd|jfd|jfd|jfd|jff}xw|jD]l}d}xS|D]K\}}|jj|rky|}PWqtk rd}PYqXqkqkW|sXPqXqXW|j S( Ns(udataujumpTo(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu handleCommentsuEncodingParser.handleCommentcCsD|jjtkrdSd}d}x|j}|dkrAdS|ddkr|ddk}|r=|dk r=||_dSq%|ddkr|d}t|}|dk r=||_dSq%|ddkr%t t |d}|j }|dk r=t|}|dk r:|r.||_dS|}q:q=q%q%dS( Nis http-equivis content-typescharsetscontentTF( udatau currentByteuspaceCharactersBytesuTrueuFalseuNoneu getAttributeuencodingu codecNameuContentAttrParseru EncodingBytesuparse(uselfu hasPragmaupendingEncodinguattrutentativeEncodingucodecu contentParser((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu handleMetas:            uEncodingParser.handleMetacCs |jdS(NF(uhandlePossibleTaguFalse(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuhandlePossibleStartTagsu%EncodingParser.handlePossibleStartTagcCst|j|jdS(NT(unextudatauhandlePossibleTaguTrue(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuhandlePossibleEndTags u#EncodingParser.handlePossibleEndTagcCs|j}|jtkr9|r5|j|jndS|jt}|dkra|jn+|j}x|dk r|j}qpWdS(Ns(udataujumpTo(uself((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu handleOthersuEncodingParser.handleOthercCs|j}|jttdgB}|dksIt|dksIt|d krYdSg}g}x|dkr~|r~Pnz|tkr|j}Pn^|d krdj|dfS|tkr|j |j n|dkrdS|j |t |}qh|dkr0|j dj|dfSt ||j}|d kr|}xt |}||krt |dj|dj|fS|tkr|j |j q[|j |q[n^|dkrdj|dfS|tkr|j |j n|dkrdS|j |xvt |}|t krcdj|dj|fS|tkr|j |j q/|dkrdS|j |q/dS( u_Return a name,value pair for the next attribute in the stream, if one is found, or Nones/is>s=ss's"N(s>N(s/s>(s's"(udatauskipuspaceCharactersBytesu frozensetuNoneulenuAssertionErrorujoinuasciiUppercaseBytesuappendulowerunextuprevioususpacesAngleBrackets(uselfudataucuattrNameu attrValueu quoteChar((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu getAttributesh $                        uEncodingParser.getAttributeN( u__name__u __module__u __qualname__u__doc__u__init__u getEncodingu handleCommentu handleMetauhandlePossibleStartTaguhandlePossibleEndTaguhandlePossibleTagu handleOtheru getAttribute(u __locals__((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuEncodingParsers    $    uEncodingParsercBs,|EeZdZddZddZdS(uContentAttrParsercCs"t|tst||_dS(N(u isinstanceubytesuAssertionErrorudata(uselfudata((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu__init__?suContentAttrParser.__init__cCsNy1|jjd|jjd7_|jj|jjdksHdS|jjd7_|jj|jjdkr|jj}|jjd7_|jj}|jj|r|j||jjSdSn]|jj}y+|jjt|j||jjSWn#tk r/|j|dSYnXWntk rIdSYnXdS(Nscharsetis=s"s'(s"s'( udataujumpToupositionuskipu currentByteuNoneu skipUntiluspaceCharactersBytesu StopIteration(uselfu quoteMarku oldPosition((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuparseCs.       uContentAttrParser.parseN(u__name__u __module__u __qualname__u__init__uparse(u __locals__((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyuContentAttrParser>s uContentAttrParserc Cstt|tr>y|jd}Wq>tk r:dSYq>Xn|rltjd|j}tj |dSdSdS(u{Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.uasciiuN( u isinstanceubytesudecodeuUnicodeDecodeErroruNoneuascii_punctuation_reusubuloweru encodingsuget(uencodingu canonicalName((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyu codecNamees  u codecNameT(-u __future__uabsolute_importudivisionuunicode_literalsupip._vendor.sixu text_typeucodecsureu constantsuEOFuspaceCharactersu asciiLettersuasciiUppercaseu encodingsuReparseExceptionuuutilsuiouStringIOuBytesIOu ImportErroruBufferedIOBaseuobjectu frozensetuspaceCharactersBytesuasciiLettersBytesuasciiUppercaseBytesuspacesAngleBracketsucompileuinvalid_unicode_reusetunon_bmp_invalid_codepointsuascii_punctuation_reucharsUntilRegExuBufferedStreamuNoneuTrueuHTMLInputStreamuHTMLUnicodeInputStreamuHTMLBinaryInputStreamubytesu EncodingBytesuEncodingParseruContentAttrParseru codecName(((u9/tmp/pip-zej_zi-build/pip/_vendor/html5lib/inputstream.pyusJ  "   Jg'