ó zfc@sdZddlZddlZejdƒZejdƒZejdƒZejdƒZejdƒZejdƒZ ejd ƒZ ejd ƒZ ejd ƒZ ejd ƒZ ejd ejƒZejdƒZejdƒZdefd„ƒYZdejfd„ƒYZdS(sA parser for HTML and XHTML.iÿÿÿÿNs[&<]s &[a-zA-Z#]s%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s <[a-zA-Z]t>s--\s*>s$([a-zA-Z][^ />]*)(?:\s|/(?!>))*s[a-zA-Z][^ />]*s]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*s <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace s#tHTMLParseErrorcBs#eZdZdd„Zd„ZRS(s&Exception raised for all parse errors.cCs'||_|d|_|d|_dS(Nii(tmsgtlinenotoffset(tselfRtposition((s"/usr/lib64/python2.7/HTMLParser.pyt__init__<s  cCsW|j}|jdk r,|d|j}n|jdk rS|d|jd}n|S(Ns , at line %ds , column %di(RRtNoneR(Rtresult((s"/usr/lib64/python2.7/HTMLParser.pyt__str__Bs  N(NN(t__name__t __module__t__doc__RRR (((s"/usr/lib64/python2.7/HTMLParser.pyR9s t HTMLParsercBs eZdZdZd„Zd„Zd„Zd„Zd„ZdZ d„Z d „Z d „Z d „Zd „Zd d„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„ZdZd„Z RS( sÇFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). Entity references are passed by calling self.handle_entityref() with the entity reference as the argument. Numeric character references are passed to self.handle_charref() with the string containing the reference as the argument. tscripttstylecCs|jƒdS(s#Initialize and reset this instance.N(treset(R((s"/usr/lib64/python2.7/HTMLParser.pyRbscCs8d|_d|_t|_d|_tjj|ƒdS(s1Reset this instance. Loses all unprocessed data.ts???N( trawdatatlasttagtinteresting_normalt interestingRt cdata_elemt markupbaset ParserBaseR(R((s"/usr/lib64/python2.7/HTMLParser.pyRfs     cCs!|j||_|jdƒdS(s‘Feed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). iN(Rtgoahead(Rtdata((s"/usr/lib64/python2.7/HTMLParser.pytfeednscCs|jdƒdS(sHandle any buffered data.iN(R(R((s"/usr/lib64/python2.7/HTMLParser.pytclosewscCst||jƒƒ‚dS(N(Rtgetpos(Rtmessage((s"/usr/lib64/python2.7/HTMLParser.pyterror{scCs|jS(s)Return full source of start tag: '<...>'.(t_HTMLParser__starttag_text(R((s"/usr/lib64/python2.7/HTMLParser.pytget_starttag_text€scCs2|jƒ|_tjd|jtjƒ|_dS(Ns (tlowerRtretcompiletIR(Rtelem((s"/usr/lib64/python2.7/HTMLParser.pytset_cdata_mode„scCst|_d|_dS(N(RRRR(R((s"/usr/lib64/python2.7/HTMLParser.pytclear_cdata_modeˆs c Csj|j}d}t|ƒ}xö||kr|jj||ƒ}|rT|jƒ}n|jraPn|}||krŠ|j|||!ƒn|j||ƒ}||kr¬Pn|j}|d|ƒr7t j ||ƒrè|j |ƒ}n¯|d|ƒr |j |ƒ}nŽ|d|ƒr*|j |ƒ}nm|d|ƒrK|j|ƒ}nL|d|ƒrl|j|ƒ}n+|d|kr–|jdƒ|d}nP|dkr"|s­Pn|jd|dƒ}|dkr|jd|dƒ}|dkr |d}q n |d7}|j|||!ƒn|j||ƒ}q|d |ƒrtj ||ƒ}|rÂ|jƒd d !} |j| ƒ|jƒ}|d |dƒsª|d}n|j||ƒ}qqd ||kr|j|||d !ƒ|j||d ƒ}nPq|d |ƒrtj ||ƒ}|rŽ|jdƒ} |j| ƒ|jƒ}|d |dƒsv|d}n|j||ƒ}qntj ||ƒ}|rÖ|rÒ|jƒ||krÒ|jdƒnPq|d|kr |jd ƒ|j||dƒ}qPqqW|rY||krY|j rY|j|||!ƒ|j||ƒ}n|||_dS(NitRtitnR4tjR2tktname((s"/usr/lib64/python2.7/HTMLParser.pyRs                    cCsì|j}|||d!dkr0|jdƒn|||d!dkrT|j|ƒS|||d!dkrx|j|ƒS|||d!jƒd krÛ|jd |dƒ}|d kr»d S|j||d|!ƒ|d S|j|ƒSdS( Nis(RRBRR4RD((s"/usr/lib64/python2.7/HTMLParser.pyR8s   cCsnd|_|j|ƒ}|dkr(|S|j}|||!|_g}tj||dƒ}|jƒ}|jdƒjƒ|_ }xî||krut j||ƒ}|s°Pn|jdddƒ\} } } | sÝd} nX| d dkoü| dkns%| d dko | dknr5| dd!} n| rM|j | ƒ} n|j | jƒ| fƒ|jƒ}qˆW|||!j ƒ} | d kr|jƒ\} }d |jkrî| |jjd ƒ} t|jƒ|jjd ƒ}n|t|jƒ}|j|||!ƒ|S| jd ƒr;|j||ƒn/|j||ƒ||jkrj|j|ƒn|S( Niiiis'iÿÿÿÿt"Rs/>s (Rs/>(RR!tcheck_for_whole_start_tagRttagfindR4R>R<R#RtattrfindtunescapetappendtstripRtcountR-trfindR0tendswiththandle_startendtagthandle_starttagtCDATA_CONTENT_ELEMENTSR((RRBtendposRtattrsR4REttagtmtattrnametrestt attrvalueR>RR((s"/usr/lib64/python2.7/HTMLParser.pyR5sP     $$  cCsý|j}tj||ƒ}|rí|jƒ}|||d!}|dkrR|dS|dkr²|jd|ƒrx|dS|jd|ƒrŽdS|j||dƒ|jdƒn|dkrÂdS|d krÒdS||krâ|S|dSntd ƒ‚dS( NiRt/s/>iiÿÿÿÿsmalformed empty start tagRs6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZswe should not get here!(RtlocatestarttagendR4R>R2R1R tAssertionError(RRBRR`RDtnext((s"/usr/lib64/python2.7/HTMLParser.pyRQNs,        cCsj|j}tj||dƒ}|s)dS|jƒ}tj||ƒ}|s|jdk rt|j|||!ƒ|St j||dƒ}|s¿|||d!dkr¯|dS|j |ƒSn|j dƒj ƒ}|j d|jƒƒ}|j|ƒ|dS|j dƒj ƒ}|jdk rO||jkrO|j|||!ƒ|Sn|j|ƒ|jƒ|S(NiiÿÿÿÿiisR(Rt endendtagR.R>t endtagfindR4RRR0RRRIR<R#R:t handle_endtagR)(RRBRR4RJt namematchttagnameR'((s"/usr/lib64/python2.7/HTMLParser.pyR6ns6     cCs!|j||ƒ|j|ƒdS(N(R[Rj(RR_R^((s"/usr/lib64/python2.7/HTMLParser.pyRZ–scCsdS(N((RR_R^((s"/usr/lib64/python2.7/HTMLParser.pyR[›scCsdS(N((RR_((s"/usr/lib64/python2.7/HTMLParser.pyRjŸscCsdS(N((RRF((s"/usr/lib64/python2.7/HTMLParser.pyR=£scCsdS(N((RRF((s"/usr/lib64/python2.7/HTMLParser.pyR@§scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyR0«scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyRK¯scCsdS(N((Rtdecl((s"/usr/lib64/python2.7/HTMLParser.pyRH³scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyRO·scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyt unknown_declºscs2d|kr|S‡fd†}tjd||ƒS(NR,cs|jƒd}yZ|ddkri|d}|dd krSt|ddƒ}n t|ƒ}t|ƒSWntk r†d|dSXtjdkrëd dl}id d 6}x-|jj ƒD]\}}t|ƒ||s&