U e'@sTdZdddgZddlZddlmZmZz0ddlmZmZm Z m Z m Z m Z m Z e e fZWn8ek rddlmZmZm Z m Z m Z m Z e ZYnXdd dZdd dZdd dZd d ZedejjZGdddZddZddZzddlmZWn"ek rddlmZYnXedjZze Wne!k rFe"Z YnXddZ#dS)z5External interface to the BeautifulSoup HTML parser. fromstringparse convert_treeN)etreehtml) BeautifulSoupTagCommentProcessingInstructionNavigableString DeclarationDoctype)rrr r r r cKst|||f|S)aParse a string of HTML data into an Element tree using the BeautifulSoup parser. Returns the root ```` Element of the tree. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. )_parse)data beautifulsoup makeelementbsargsrD/opt/hc_python/lib64/python3.8/site-packages/lxml/html/soupparser.pyrs cKs,t|dst|}t|||f|}t|S)aYParse a file into an ElemenTree using the BeautifulSoup parser. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. read)hasattropenrrZ ElementTree)filerrrrootrrrr$s cCs*t||}|}|D]}||q|S)aConvert a BeautifulSoup tree to a list of Element trees. Returns a list instead of a single root Element to support HTML-like soup with more than one root element. You can pass a different Element factory through the `makeelement` keyword. ) _convert_treeZ getchildrenremove)beautiful_soup_treerrchildrenchildrrrr3s  cKs|dkr t}t|dr&d|kr&d|d<t|dr@d|kr@d|d<||f|}t||}t|dkrx|djdkrx|dSd|_|S) NZ HTML_ENTITIESZconvertEntitiesrZDEFAULT_BUILDER_FEATURESfeaturesz html.parserr)rrrlentag)sourcerrrtreerrrrrEs    rz`(?:\s|[|\}}| j}|o|dd|_|o|dd|_| S)Nrr )r html_parserr enumerate isinstancerr&lower_DECLARATION_OR_DOCTYPEr(indexr%_init_node_convertersreversedZ addpreviousZaddnextZ output_readyAttributeErrorstring_parse_doctype_declarationgroupsZ getroottreedocinfo public_idZ system_url)rrZfirst_element_idxZlast_element_idxZ html_rootZ declarationieZpre_rootZ post_rootroots convert_nodeZres_rootprevZ convertedZdoctype_stringmatchZ external_idZsys_urir=rrrrisd   &          rcsigfdd}fdddfdd ddd d |ttfd d }|td d}|tdd}|tfdd}S)Ncsfdd}|S)Ncs D]}||<|q|Sr+)append)handlert) convertersordered_node_typestypesrradds z5_init_node_converters..converter..addr)rJrKrHrI)rJr convertersz(_init_node_converters..convertercs$D]}t||r|SqdSr+)r3)noderGrLrrfind_best_converters z2_init_node_converters..find_best_convertercsPzt|}Wn(tk r8|}t|<YnX|dkrFdS|||Sr+)typeKeyError)bs_nodeparentrF)rHrOrrrBsz+_init_node_converters..convert_nodecSsTt|trBi}|D](\}}t|tr2d|}t|||<qndd|D}|S)N cSsi|]\}}|t|qSrunescape).0kvrrr sz<_init_node_converters..map_attrs..)r3dictitemslistjoinrV)Zbs_attrsattribsrXrYrrr map_attrss   z(_init_node_converters..map_attrscSs:t|dkr|jpd||_n|djp*d||d_dS)Nrr0)r!texttail)rSrbrrr append_texts z*_init_node_converters..append_textc s|j}|dk r2|r|nd}tj||j|d}n|r>|ni}|j|d}|D]H}zt|}Wntk r|YnX|dk rT|||qT||qT|S)N)Zattrib)r'rZ SubElementr&rPrQ)rRrSr'r_resrrF)rBrHrr`rr convert_tags   z*_init_node_converters..convert_tagcSs t|}|dk r|||Sr+)rZ HtmlCommentrErRrSrerrrconvert_comments  z._init_node_converters..convert_commentcSs>|dr|dd}tj|dd}|dk r:|||S)N?r0rTr )endswithrr splitrErgrrr convert_pi s    z)_init_node_converters..convert_pics|dk r|t|dSr+rU)rRrS)rdrr convert_textsz+_init_node_converters..convert_text)N)rr%r r r )rrMrfrhrlrmr)rdrBrHrOrr`rIrr7s     r7)name2codepointz&(\w+);cCs|sdSdd}t||S)NracSs8ztt|dWStk r2|dYSXdS)Nr r)unichrrngrouprQ)mrrrunescape_entity5sz!unescape..unescape_entity)handle_entities)r:rrrrrrV1srV)NN)NN)N)$__doc____all__reZlxmlrrZbs4rrr r r r r r5 ImportErrorrrrrcompile IGNORECASErDr;r%rr7 html.entitiesrnhtmlentitydefssubrsro NameErrorchrrVrrrrs< $       Uc