Yf4@sddZddlZddlZdgZGdddZGdddZGdddZdS) a% robotparser.py Copyright (C) 2000 Bastian Kleineidam You can choose between two licenses when using this package: 1) GNU GPLv2 2) PSF license for Python 2.2 The robots.txt Exclusion Protocol is implemented as specified in http://www.robotstxt.org/norobots-rfc.txt NRobotFileParserc@seZdZdZdddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS)rzs This class provides a set of methods to read, parse and answer questions about a single robots.txt file. cCs>g|_d|_d|_d|_|j|d|_dS)NFr)entries default_entry disallow_all allow_allset_url last_checked)selfurlr 7/opt/alt/python35/lib64/python3.5/urllib/robotparser.py__init__s      zRobotFileParser.__init__cCs|jS)zReturns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. )r )r r r r mtimeszRobotFileParser.mtimecCsddl}|j|_dS)zYSets the time the robots.txt file was last fetched to the current time. rN)timer )r rr r r modified(s zRobotFileParser.modifiedcCs5||_tjj|dd\|_|_dS)z,Sets the URL referring to a robots.txt file.N)r urllibparseurlparsehostpath)r r r r r r0s zRobotFileParser.set_urlcCsytjj|j}Wnmtjjk r}zG|jdkrOd|_n'|jdkrv|jdkrvd|_WYdd}~Xn)X|j }|j |j dj dS) z4Reads the robots.txt URL and feeds it to the parser.TiiNzutf-8)rr) rZrequestZurlopenr errorZ HTTPErrorcoderrreadrdecode splitlines)r ferrrawr r r r5s  zRobotFileParser.readcCs>d|jkr*|jdkr:||_n|jj|dS)N*) useragentsrrappend)r entryr r r _add_entryBs zRobotFileParser._add_entrycCsd}t}|jx|D]}|sr|dkrJt}d}n(|dkrr|j|t}d}|jd}|dkr|d|}|j}|sq |jdd}t|dkr |djj|ds z+RobotFileParser.__str__..)joinr)r r r r __str__szRobotFileParser.__str__N) __name__ __module__ __qualname____doc__rrrrrr'rr>rDr r r r rs    4 c@s:eZdZdZddZddZddZdS) r3zoA rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.cCsY|dkr| rd}tjjtjj|}tjj||_||_dS)NrT)rrr9rr:rr<)r rr<r r r rs zRuleLine.__init__cCs|jdkp|j|jS)Nr#)r startswith)r filenamer r r r;szRuleLine.applies_tocCs|jrdndd|jS)NZAllowZDisallowz: )r<r)r r r r rDszRuleLine.__str__N)rErFrGrHrr;rDr r r r r3s   r3c@sFeZdZdZddZddZddZdd Zd S) r+z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS)N)r$r2)r r r r rs zEntry.__init__cCsjg}x'|jD]}|jd|dgqWx*|jD]}|jt|dgq:Wdj|S)Nz User-agent: r?r)r$extendr2r@rC)r Zretagentr6r r r rDs z Entry.__str__cCs]|jddj}x=|jD]2}|dkr9dS|j}||kr#dSq#WdS)z2check if this entry applies to the specified agentr8rr#TF)r.r0r$)r r=rLr r r r;s   zEntry.applies_tocCs.x'|jD]}|j|r |jSq WdS)zZPreconditions: - our agent applies to this entry - filename is URL decodedT)r2r;r<)r rJr6r r r r<s zEntry.allowanceN)rErFrGrHrrDr;r<r r r r r+s    r+)rHZ urllib.parserZurllib.request__all__rr3r+r r r r  s