7Re,c @s ddlmZmZmZy eZWnek r:YnXddlmZddl m Z ddl m Z ddl m Z m Z ddl mZmZmZddl mZmZdd l mZdd lmZdd lmZee ZGd d d eZdS(i(uabsolute_importudivisionuunicode_literals(udequei(uspaceCharacters(uentities(u asciiLettersuasciiUpper2Lower(udigitsu hexDigitsuEOF(u tokenTypesu tagTokenTypes(ureplacementCharacters(uHTMLInputStream(uTriec s|EeZdZdZddddddfddZddZddZdddd Z d d Z d d Z ddZ ddZddZddZddZddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Z d6d7Z!d8d9Z"d:d;Z#d<d=Z$d>d?Z%d@dAZ&dBdCZ'dDdEZ(dFdGZ)dHdIZ*dJdKZ+dLdMZ,dNdOZ-dPdQZ.dRdSZ/dTdUZ0dVdWZ1dXdYZ2dZd[Z3d\d]Z4d^d_Z5d`daZ6dbdcZ7dddeZ8dfdgZ9dhdiZ:djdkZ;dldmZ<dndoZ=dpdqZ>drdsZ?dtduZ@dvdwZAdxdyZBdzd{ZCd|d}ZDd~dZEddZFddZGddZHddZIddZJddZKddZLddZMddZNddZOS(u HTMLTokenizeru  This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object. cszt|||||_||_||_||_d|_g|_|j|_ d|_ d|_ t t|jdS(NF(uHTMLInputStreamustreamuparserulowercaseElementNameulowercaseAttrNameuFalseu escapeFlagu lastFourCharsu dataStateustateuescapeuNoneu currentTokenusuperu HTMLTokenizeru__init__(uselfustreamuencodingu parseMetau useChardetulowercaseElementNameulowercaseAttrNameuparser(u __class__(u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu__init__%s        uHTMLTokenizer.__init__ccs}tg|_xg|jrxx6|jjrVitdd6|jjjdd6Vq!Wx|jrt|jjVqZWqWdS(u This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested. u ParseErrorutypeiudataN(udequeu tokenQueueustateustreamuerrorsu tokenTypesupopupopleft(uself((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu__iter__9s * uHTMLTokenizer.__iter__c 0Cst}d}|r!t}d}ng}|jj}x8||krp|tk rp|j||jj}q9Wtdj||}|tkrt|}|j jit dd6dd6i|d6d 6nd |kod kns|d kr3d }|j jit dd6dd6i|d6d 6nsd|koJdknsd|kofdknsd|kodknsd|kodkns|t ddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d g#krQ|j jit dd6dd6i|d6d 6nyt |}WnBt k r|d8}t d |d?Bt d9|d:@B}YnX|d;kr|j jit dd6d<d6|jj|n|S(=uThis function returns either U+FFFD or the character based on the decimal or hexadecimal representation. It also discards ";" if present. If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. i iuu ParseErrorutypeu$illegal-codepoint-for-numeric-entityudatau charAsIntudatavarsiiiu�iiiiiiiii iiiiiiiiiiiiiiiiiii i i i i i i i i i iiiiiiiiu;u numeric-entity-without-semicolon(udigitsu hexDigitsustreamucharuEOFuappenduintujoinureplacementCharactersu tokenQueueu tokenTypesu frozensetuchru ValueErroruunget( uselfuisHexualloweduradixu charStackucu charAsIntucharuv((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyuconsumeNumberEntityIs`              +  u!HTMLTokenizer.consumeNumberEntityc Csd}|jjg}|dtks]|dtddfks]|dk rt||dkrt|jj|dn|ddkrpd}|j|jj|ddkrd}|j|jjn|r|dt ks| r"|dt kr"|jj|d|j |}qD|j jit dd 6d d 6|jj|jdd j|}nxF|dtk rtjd j|sPn|j|jjqsWy2tjd j|dd}t|}Wntk rd}YnX|dk r|dd krG|j jit dd 6dd 6n|dd kr|r||tks||t ks||dkr|jj|jdd j|}qDt|}|jj|j|d j||d7}nK|j jit dd 6dd 6|jj|jdd j|}|rf|jd dd|7u ParseErroru'expected-tag-name-but-got-right-bracketu Charactersu<>u?u'expected-tag-name-but-got-question-markuexpected-tag-nameuu ParseErroru*expected-closing-tag-but-got-right-bracketu expected-closing-tag-but-got-eofu Charactersuu ParseErrorutypeueof-in-tag-nameudatau/uuinvalid-codepointunameu�T(ustreamucharuspaceCharactersubeforeAttributeNameStateustateuemitCurrentTokenuEOFu tokenQueueuappendu tokenTypesu dataStateuselfClosingStartTagStateu currentTokenuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu tagNameStates"        uHTMLTokenizer.tagNameStatecCsu|jj}|dkr3d|_|j|_n>|jjitdd6dd6|jj||j |_dS(Nu/uu Charactersutypeu|jjitdd6dd6|jj ||j |_dS(Nu Charactersutypeuu Charactersu|jjitdd6dd6|jj||j |_dS(Nu/uu Charactersutypeu|jjitdd6dd6|jj ||j |_dS(Nu Charactersutypeuu Charactersu|jjitdd6dd6|jj ||j |_d S( Nu/uu!u Charactersutypeu|jjitdd6dd6|jj ||j |_dS(Nu Charactersutypeuu Charactersuuu ParseErroruinvalid-codepointu�T( ustreamucharu tokenQueueuappendu tokenTypesu"scriptDataEscapedLessThanSignStateustateuscriptDataStateuscriptDataEscapedStateuEOFu dataStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyuscriptDataEscapedDashDashStates& %  "    " u,HTMLTokenizer.scriptDataEscapedDashDashStatecCs|jj}|dkr3d|_|j|_n|tkr}|jjitdd6d|d6||_|j |_n>|jjitdd6dd6|jj ||j |_dS(Nu/uu Charactersutypeu|jjitdd6dd6|jj ||j |_dS(Nu Charactersutypeuu Charactersuu Charactersutypeudatauscript(u/u>T(ustreamucharuspaceCharactersu frozensetu tokenQueueuappendu tokenTypesutemporaryBufferuloweruscriptDataDoubleEscapedStateustateuscriptDataEscapedStateu asciiLettersuungetuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu scriptDataDoubleEscapeStartStates" " u.HTMLTokenizer.scriptDataDoubleEscapeStartStatecCs?|jj}|dkrL|jjitdd6dd6|j|_n|dkr|jjitdd6dd6|j|_n|dkr|jjitdd6dd6|jjitdd6d d6n_|tkr|jjitdd6d d6|j |_n"|jjitdd6|d6d S( Nu-u Charactersutypeudatauuu ParseErroruinvalid-codepointu�ueof-in-script-in-scriptT( ustreamucharu tokenQueueuappendu tokenTypesu(scriptDataDoubleEscapedLessThanSignStateustateuscriptDataStateuscriptDataDoubleEscapedStateuEOFu dataStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu$scriptDataDoubleEscapedDashDashState s, % " "     " u2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsu|jj}|dkrU|jjitdd6dd6d|_|j|_n|jj||j |_dS(Nu/u CharactersutypeudatauT( ustreamucharu tokenQueueuappendu tokenTypesutemporaryBufferuscriptDataDoubleEscapeEndStateustateuungetuscriptDataDoubleEscapedStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu(scriptDataDoubleEscapedLessThanSignState9s "  u6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs|jj}|ttdBkrz|jjitdd6|d6|jjdkrk|j |_ q|j |_ n\|t kr|jjitdd6|d6|j|7_n|jj ||j |_ dS( Nu/u>u Charactersutypeudatauscript(u/u>T(ustreamucharuspaceCharactersu frozensetu tokenQueueuappendu tokenTypesutemporaryBufferuloweruscriptDataEscapedStateustateuscriptDataDoubleEscapedStateu asciiLettersuungetuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyuscriptDataDoubleEscapeEndStateDs" " u,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs|jj}|tkr1|jjtdnz|tkrf|jdj|dg|j|_ nE|dkr|j n,|dkr|j |_ n|dkr|j jit d d 6d d6|jdj|dg|j|_ n|d krH|j jit d d 6d d6|jdjddg|j|_ nc|tkr|j jit d d 6dd6|j|_ n&|jdj|dg|j|_ dS(Nudatauu>u/u'u"u=uu/uu ParseErrorutypeuinvalid-codepointu�u'u"uudatauu/uu ParseErrorutypeuinvalid-codepointu�u'u"uu ParseErrorutypeu.expected-attribute-value-but-got-right-bracketudatauuinvalid-codepointiu�u=u               u'HTMLTokenizer.beforeAttributeValueStatecCs|jj}|dkr*|j|_n|dkrF|jdn|dkr|jjitdd6dd6|jdd dd 7u"u'u=uu"u'u=u|jj it dd6dd6|jj ||j|_dS( Nu>u/u ParseErrorutypeu$unexpected-EOF-after-attribute-valueudatau*unexpected-character-after-attribute-valueT(ustreamucharuspaceCharactersubeforeAttributeNameStateustateuemitCurrentTokenuselfClosingStartTagStateuEOFu tokenQueueuappendu tokenTypesuungetu dataStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyuafterAttributeValueState*s"        u&HTMLTokenizer.afterAttributeValueStatecCs|jj}|dkr5d|jd<|jn|tkr|jjitdd6dd6|jj ||j |_ n>|jjitdd6dd6|jj ||j |_ dS( Nu>u selfClosingu ParseErrorutypeu#unexpected-EOF-after-solidus-in-tagudatau)unexpected-character-after-solidus-in-tagT( ustreamucharuTrueu currentTokenuemitCurrentTokenuEOFu tokenQueueuappendu tokenTypesuungetu dataStateustateubeforeAttributeNameState(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyuselfClosingStartTagState>s       u&HTMLTokenizer.selfClosingStartTagStatecCsc|jjd}|jdd}|jjitdd6|d6|jj|j|_dS(Nu>uu�uCommentutypeudataT( ustreamu charsUntilureplaceu tokenQueueuappendu tokenTypesucharu dataStateustateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyubogusCommentStatePs   uHTMLTokenizer.bogusCommentStatec Cs0|jjg}|ddkrv|j|jj|ddkritdd6dd6|_|j|_d Sne|d!d"krd }x>d)D]6}|j|jj|d*|krd+}PqqW|ritdd6dd6dd6dd6d d6|_|j |_d Sn|d,dkr|j dk r|j j j r|j j j d-j|j j jkrd }xPd dddddgD]6}|j|jj|d.|krd+}PqqW|r|j|_d Sn|jjitdd6dd6x |r|jj|jqW|j|_d S(/Niu-uCommentutypeuudatauduDuouOucuCutuTuyuYupuPueuEuDoctypeunameupublicIdusystemIducorrectu[uAu ParseErroruexpected-dashes-or-doctypeiiTi(uduD(uouO(ucuC(utuT(uyuY(upuP(ueuE((uouO(ucuC(utuT(uyuY(upuP(ueuEiFiii(ustreamucharuappendu tokenTypesu currentTokenucommentStartStateustateuTrueuFalseuNoneu doctypeStateuparserutreeu openElementsu namespaceudefaultNamespaceucdataSectionStateu tokenQueueuungetupopubogusCommentState(uselfu charStackumatcheduexpected((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyumarkupDeclarationOpenState_sR    %    u(HTMLTokenizer.markupDeclarationOpenStatecCs-|jj}|dkr*|j|_n|dkrl|jjitdd6dd6|jdd7uincorrect-commentueof-in-commentT( ustreamucharucommentStartDashStateustateu tokenQueueuappendu tokenTypesu currentTokenu dataStateuEOFu commentStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyucommentStartStates(        uHTMLTokenizer.commentStartStatecCs1|jj}|dkr*|j|_n|dkrl|jjitdd6dd6|jdd7uincorrect-commentueof-in-commentT( ustreamucharucommentEndStateustateu tokenQueueuappendu tokenTypesu currentTokenu dataStateuEOFu commentStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyucommentStartDashStates(        u#HTMLTokenizer.commentStartDashStatecCs|jj}|dkr*|j|_n|dkrl|jjitdd6dd6|jdd7uu ParseErrorutypeuinvalid-codepointudatau--�u!u,unexpected-bang-after-double-dash-in-commentu-u,unexpected-dash-after-double-dash-in-commentueof-in-comment-double-dashuunexpected-char-in-commentu--T( ustreamucharu tokenQueueuappendu currentTokenu dataStateustateu tokenTypesu commentStateucommentEndBangStateuEOFuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyucommentEndStates6           uHTMLTokenizer.commentEndStatecCs,|jj}|dkr=|jj|j|j|_n|dkri|jdd7<|j|_n|dkr|jjitdd6dd6|jdd 7<|j |_nq|t kr|jjitdd6d d6|jj|j|j|_n!|jdd|7<|j |_d S( Nu>u-udatau--!uu ParseErrorutypeuinvalid-codepointu--!�ueof-in-comment-end-bang-stateT( ustreamucharu tokenQueueuappendu currentTokenu dataStateustateucommentEndDashStateu tokenTypesu commentStateuEOFuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyucommentEndBangStates(       u!HTMLTokenizer.commentEndBangStatecCs|jj}|tkr*|j|_n|tkr|jjitdd6dd6d|j d<|jj|j |j |_n>|jjitdd6dd6|jj ||j|_dS( Nu ParseErrorutypeu!expected-doctype-name-but-got-eofudataucorrectuneed-space-after-doctypeFT(ustreamucharuspaceCharactersubeforeDoctypeNameStateustateuEOFu tokenQueueuappendu tokenTypesuFalseu currentTokenu dataStateuungetuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu doctypeStates      uHTMLTokenizer.doctypeStatecCs?|jj}|tkrn|dkr{|jjitdd6dd6d |jd<|jj|j|j|_ n|dkr|jjitdd6dd6d |jd <|j |_ nv|t kr"|jjitdd6d d6d |jd<|jj|j|j|_ n||jd <|j |_ d S(Nu>u ParseErrorutypeu+expected-doctype-name-but-got-right-bracketudataucorrectuuinvalid-codepointu�unameu!expected-doctype-name-but-got-eofFT( ustreamucharuspaceCharactersu tokenQueueuappendu tokenTypesuFalseu currentTokenu dataStateustateudoctypeNameStateuEOFuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyubeforeDoctypeNameState&s.            u$HTMLTokenizer.beforeDoctypeNameStatecCso|jj}|tkrG|jdjt|jd<|j|_n$|dkr|jdjt|jd<|jj |j|j |_n|dkr|jj it dd6dd6|jdd7<|j |_n|t krZ|jj it dd6d d6d |jd <|jdjt|jd<|jj |j|j |_n|jd|7uu ParseErrorutypeuinvalid-codepointudatau�ueof-in-doctype-nameucorrectFT(ustreamucharuspaceCharactersu currentTokenu translateuasciiUpper2LoweruafterDoctypeNameStateustateu tokenQueueuappendu dataStateu tokenTypesudoctypeNameStateuEOFuFalseuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyudoctypeNameState@s,       uHTMLTokenizer.doctypeNameStatecCs|jj}|tkrn|dkrL|jj|j|j|_n|tkrd|jd<|jj ||jjit dd6dd6|jj|j|j|_n|d krd!}x3d'D]+}|jj}||krd}PqqW|r{|j |_d!Sna|d(kr{d!}x3d.D]+}|jj}||kr3d}Pq3q3W|r{|j|_d!Sn|jj ||jjit dd6dd6i|d6d6d|jd<|j|_d!S(/Nu>ucorrectu ParseErrorutypeueof-in-doctypeudataupuPuuuUubuBuluLuiuIucuCusuSuyuYutuTueuEumuMu*expected-space-or-right-bracket-in-doctypeudatavarsF(upuPT(uuuU(ubuB(uluL(uiuI(ucuC((uuuU(ubuB(uluL(uiuI(ucuC(usuS(uyuY(usuS(utuT(ueuE(umuM((uyuY(usuS(utuT(ueuE(umuM(ustreamucharuspaceCharactersu tokenQueueuappendu currentTokenu dataStateustateuEOFuFalseuungetu tokenTypesuTrueuafterDoctypePublicKeywordStateuafterDoctypeSystemKeywordStateubogusDoctypeState(uselfudataumatcheduexpected((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyuafterDoctypeNameStateYsT               u#HTMLTokenizer.afterDoctypeNameStatecCs|jj}|tkr*|j|_n|d krw|jjitdd6dd6|jj||j|_ny|t kr|jjitdd6dd6d |j d<|jj|j |j |_n|jj||j|_d S( Nu'u"u ParseErrorutypeuunexpected-char-in-doctypeudataueof-in-doctypeucorrect(u'u"FT(ustreamucharuspaceCharactersu"beforeDoctypePublicIdentifierStateustateu tokenQueueuappendu tokenTypesuungetuEOFuFalseu currentTokenu dataStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyuafterDoctypePublicKeywordStates"       u,HTMLTokenizer.afterDoctypePublicKeywordStatecCsg|jj}|tkrnE|dkrFd|jd<|j|_n|dkrnd|jd<|j|_n|dkr|jjit dd6dd 6d |jd <|jj|j|j |_n|t kr(|jjit dd6d d 6d |jd <|jj|j|j |_n;|jjit dd6d d 6d |jd <|j |_dS(Nu"uupublicIdu'u>u ParseErrorutypeuunexpected-end-of-doctypeudataucorrectueof-in-doctypeuunexpected-char-in-doctypeFT(ustreamucharuspaceCharactersu currentTokenu(doctypePublicIdentifierDoubleQuotedStateustateu(doctypePublicIdentifierSingleQuotedStateu tokenQueueuappendu tokenTypesuFalseu dataStateuEOFubogusDoctypeStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu"beforeDoctypePublicIdentifierStates4              u0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs;|jj}|dkr*|j|_n |dkrl|jjitdd6dd6|jdd7uunexpected-end-of-doctypeucorrectueof-in-doctypeFT( ustreamucharu!afterDoctypePublicIdentifierStateustateu tokenQueueuappendu tokenTypesu currentTokenuFalseu dataStateuEOFuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu(doctypePublicIdentifierDoubleQuotedStates*         u6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs;|jj}|dkr*|j|_n |dkrl|jjitdd6dd6|jdd7uunexpected-end-of-doctypeucorrectueof-in-doctypeFT( ustreamucharu!afterDoctypePublicIdentifierStateustateu tokenQueueuappendu tokenTypesu currentTokenuFalseu dataStateuEOFuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu(doctypePublicIdentifierSingleQuotedStates*         u6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs|jj}|tkr*|j|_nZ|dkrX|jj|j|j|_n,|dkr|jjit dd6dd6d|jd<|j |_n|d kr|jjit dd6dd6d|jd<|j |_n|t krI|jjit dd6d d6d |jd <|jj|j|j|_n;|jjit dd6dd6d |jd <|j|_d S(Nu>u"u ParseErrorutypeuunexpected-char-in-doctypeudatauusystemIdu'ueof-in-doctypeucorrectFT(ustreamucharuspaceCharactersu-betweenDoctypePublicAndSystemIdentifiersStateustateu tokenQueueuappendu currentTokenu dataStateu tokenTypesu(doctypeSystemIdentifierDoubleQuotedStateu(doctypeSystemIdentifierSingleQuotedStateuEOFuFalseubogusDoctypeStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu!afterDoctypePublicIdentifierStates6              u/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs8|jj}|tkrn|dkrL|jj|j|j|_n|dkrtd|jd<|j|_n|dkrd|jd<|j |_n|t kr|jjit dd6dd 6d |jd <|jj|j|j|_n;|jjit dd6d d 6d |jd <|j |_d S(Nu>u"uusystemIdu'u ParseErrorutypeueof-in-doctypeudataucorrectuunexpected-char-in-doctypeFT(ustreamucharuspaceCharactersu tokenQueueuappendu currentTokenu dataStateustateu(doctypeSystemIdentifierDoubleQuotedStateu(doctypeSystemIdentifierSingleQuotedStateuEOFu tokenTypesuFalseubogusDoctypeStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu-betweenDoctypePublicAndSystemIdentifiersState s.            u;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs|jj}|tkr*|j|_n|d krw|jjitdd6dd6|jj||j|_ny|t kr|jjitdd6dd6d |j d<|jj|j |j |_n|jj||j|_d S( Nu'u"u ParseErrorutypeuunexpected-char-in-doctypeudataueof-in-doctypeucorrect(u'u"FT(ustreamucharuspaceCharactersu"beforeDoctypeSystemIdentifierStateustateu tokenQueueuappendu tokenTypesuungetuEOFuFalseu currentTokenu dataStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyuafterDoctypeSystemKeywordState%s"       u,HTMLTokenizer.afterDoctypeSystemKeywordStatecCsg|jj}|tkrnE|dkrFd|jd<|j|_n|dkrnd|jd<|j|_n|dkr|jjit dd6dd 6d |jd <|jj|j|j |_n|t kr(|jjit dd6d d 6d |jd <|jj|j|j |_n;|jjit dd6dd 6d |jd <|j |_d S(Nu"uusystemIdu'u>u ParseErrorutypeuunexpected-char-in-doctypeudataucorrectueof-in-doctypeFT(ustreamucharuspaceCharactersu currentTokenu(doctypeSystemIdentifierDoubleQuotedStateustateu(doctypeSystemIdentifierSingleQuotedStateu tokenQueueuappendu tokenTypesuFalseu dataStateuEOFubogusDoctypeStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu"beforeDoctypeSystemIdentifierState9s4              u0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs;|jj}|dkr*|j|_n |dkrl|jjitdd6dd6|jdd7uunexpected-end-of-doctypeucorrectueof-in-doctypeFT( ustreamucharu!afterDoctypeSystemIdentifierStateustateu tokenQueueuappendu tokenTypesu currentTokenuFalseu dataStateuEOFuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu(doctypeSystemIdentifierDoubleQuotedStateVs*         u6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs;|jj}|dkr*|j|_n |dkrl|jjitdd6dd6|jdd7uunexpected-end-of-doctypeucorrectueof-in-doctypeFT( ustreamucharu!afterDoctypeSystemIdentifierStateustateu tokenQueueuappendu tokenTypesu currentTokenuFalseu dataStateuEOFuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu(doctypeSystemIdentifierSingleQuotedStatens*         u6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs|jj}|tkrn|dkrL|jj|j|j|_n|tkr|jjit dd6dd6d|jd<|jj|j|j|_n.|jjit dd6dd6|j |_d S( Nu>u ParseErrorutypeueof-in-doctypeudataucorrectuunexpected-char-in-doctypeFT( ustreamucharuspaceCharactersu tokenQueueuappendu currentTokenu dataStateustateuEOFu tokenTypesuFalseubogusDoctypeStateuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu!afterDoctypeSystemIdentifierStates        u/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCs|jj}|dkr=|jj|j|j|_n>|tkr{|jj||jj|j|j|_ndS(Nu>T( ustreamucharu tokenQueueuappendu currentTokenu dataStateustateuEOFuungetuTrue(uselfudata((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyubogusDoctypeStates  uHTMLTokenizer.bogusDoctypeStatecCseg}x|j|jjd|j|jjd|jj}|tkrZPq |dkslt|ddddkr|ddd|diiu]]uuiu ParseErrorutypeuinvalid-codepointudatau�u CharactersiiiiiT(uappendustreamu charsUntilucharuEOFuAssertionErrorujoinucounturangeu tokenQueueu tokenTypesureplaceu dataStateustateuTrue(uselfudataucharu nullCountui((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyucdataSectionStates0   uHTMLTokenizer.cdataSectionStateNTF(Pu__name__u __module__u __qualname__u__doc__uNoneuTrueu__init__u__iter__uconsumeNumberEntityuFalseu consumeEntityuprocessEntityInAttributeuemitCurrentTokenu dataStateuentityDataStateu rcdataStateucharacterReferenceInRcdatau rawtextStateuscriptDataStateuplaintextStateu tagOpenStateucloseTagOpenStateu tagNameStateurcdataLessThanSignStateurcdataEndTagOpenStateurcdataEndTagNameStateurawtextLessThanSignStateurawtextEndTagOpenStateurawtextEndTagNameStateuscriptDataLessThanSignStateuscriptDataEndTagOpenStateuscriptDataEndTagNameStateuscriptDataEscapeStartStateuscriptDataEscapeStartDashStateuscriptDataEscapedStateuscriptDataEscapedDashStateuscriptDataEscapedDashDashStateu"scriptDataEscapedLessThanSignStateu scriptDataEscapedEndTagOpenStateu scriptDataEscapedEndTagNameStateu scriptDataDoubleEscapeStartStateuscriptDataDoubleEscapedStateu scriptDataDoubleEscapedDashStateu$scriptDataDoubleEscapedDashDashStateu(scriptDataDoubleEscapedLessThanSignStateuscriptDataDoubleEscapeEndStateubeforeAttributeNameStateuattributeNameStateuafterAttributeNameStateubeforeAttributeValueStateuattributeValueDoubleQuotedStateuattributeValueSingleQuotedStateuattributeValueUnQuotedStateuafterAttributeValueStateuselfClosingStartTagStateubogusCommentStateumarkupDeclarationOpenStateucommentStartStateucommentStartDashStateu commentStateucommentEndDashStateucommentEndStateucommentEndBangStateu doctypeStateubeforeDoctypeNameStateudoctypeNameStateuafterDoctypeNameStateuafterDoctypePublicKeywordStateu"beforeDoctypePublicIdentifierStateu(doctypePublicIdentifierDoubleQuotedStateu(doctypePublicIdentifierSingleQuotedStateu!afterDoctypePublicIdentifierStateu-betweenDoctypePublicAndSystemIdentifiersStateuafterDoctypeSystemKeywordStateu"beforeDoctypeSystemIdentifierStateu(doctypeSystemIdentifierDoubleQuotedStateu(doctypeSystemIdentifierSingleQuotedStateu!afterDoctypeSystemIdentifierStateubogusDoctypeStateucdataSectionState(u __locals__((u __class__u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyu HTMLTokenizers    HP          #                  7 "       -          3            u HTMLTokenizerN(u __future__uabsolute_importudivisionuunicode_literalsuunichruchru NameErroru collectionsudequeu constantsuspaceCharactersuentitiesu asciiLettersuasciiUpper2Lowerudigitsu hexDigitsuEOFu tokenTypesu tagTokenTypesureplacementCharactersu inputstreamuHTMLInputStreamutrieuTrieu entitiesTrieuobjectu HTMLTokenizer(((u7/tmp/pip-zej_zi-build/pip/_vendor/html5lib/tokenizer.pyus