
    9i                         S SK JrJrJr  S SKJr   S SKJr  S SKJ	r
Jr  \
\l	        \\l        S/r " S S\5      r " S S\5      r " S	 S
\5      rg)    )absolute_importdivisionunicode_literals)str)urllib)parserequestRobotFileParserc                   R    \ rS rSrSrSS jrS rS rS rS r	S r
S	 rS
 rS rSrg)r
      zjThis class provides a set of methods to read, parse and answer
questions about a single robots.txt file.

c                 l    / U l         S U l        SU l        SU l        U R	                  U5        SU l        g )NFr   )entriesdefault_entrydisallow_all	allow_allset_urllast_checkedselfurls     c/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/future/backports/urllib/robotparser.py__init__RobotFileParser.__init__   s4    !!S    c                     U R                   $ )zReturns the time the robots.txt file was last fetched.

This is useful for long-running web spiders that need to
check for new robots.txt files periodically.

)r   r   s    r   mtimeRobotFileParser.mtime&   s        r   c                 6    SSK nUR                  5       U l        g)zISets the time the robots.txt file was last fetched to the
current time.

r   N)timer   )r   r    s     r   modifiedRobotFileParser.modified/   s    
 	 IIKr   c                 n    Xl         [        R                  R                  U5      SS u  U l        U l        g)z,Sets the URL referring to a robots.txt file.      N)r   r   r   urlparsehostpathr   s     r   r   RobotFileParser.set_url7   s+    %||44S9!A>	49r   c                     [         R                  R                  U R                  5      nUR	                  5       nU R                  UR                  S5      R                  5       5        g! [         R                  R                   aB  nUR                  S;   a  SU l         SnAgUR                  S:  a  SU l         SnAg SnAgSnAff = f)z4Reads the robots.txt URL and feeds it to the parser.zutf-8)i  i  Ti  N)r   r	   urlopenr   readr   decode
splitlineserror	HTTPErrorcoder   r   )r   frawerrs       r   r,   RobotFileParser.read<   s    		9&&txx0A &&(CJJszz'*5578 ||%% 	&xx:%$(!!S!% !	&s   )A* *C
C$CC
c                     SUR                   ;   a  U R                  c  Xl        g g U R                  R                  U5        g N*)
useragentsr   r   appendr   entrys     r   
_add_entryRobotFileParser._add_entryI   s;    %"""!!)%*" * LL&r   c                    Sn[        5       nU GH  nU(       d6  US:X  a  [        5       nSnO#US:X  a  U R                  U5        [        5       nSnUR                  S5      nUS:  a  USU nUR                  5       nU(       d  Mv  UR	                  SS5      n[        U5      S:X  d  M  US   R                  5       R                  5       US'   [        R                  R                  US   R                  5       5      US'   US   S:X  aD  US:X  a  U R                  U5        [        5       nUR                  R                  US   5        SnGM=  US   S:X  a6  US:w  a-  UR                  R                  [        US   S	5      5        SnGMy  GM|  US   S
:X  d  GM  US:w  d  GM  UR                  R                  [        US   S5      5        SnGM     US:X  a  U R                  U5        gg)z|Parse the input lines from a robots.txt file.

We allow that a user-agent: line is not preceded by
one or more blank lines.
r   r$      #N:z
user-agentdisallowFallowT)Entryr=   findstripsplitlenlowerr   r   unquoter9   r:   	rulelinesRuleLine)r   linesstater<   lineis         r   r   RobotFileParser.parseR   s    DA:!GEEaZOOE*!GEE		#AAvBQx::<D::c1%D4yA~q'--///1Q ,,..tAw}}?Q7l*z. %$$++DG4E!W
*z..xQ/GH ! " !W'z..xQ/FG !C D A:OOE" r   c                 ~   U R                   (       a  gU R                  (       a  g[        R                  R	                  [        R                  R                  U5      5      n[        R                  R                  SSUR                  UR                  UR                  UR                  45      n[        R                  R                  U5      nU(       d  SnU R                   H,  nUR                  U5      (       d  M  UR                  U5      s  $    U R                  (       a  U R                  R                  U5      $ g)z=using the parsed robots.txt decide if useragent can fetch urlFT /)r   r   r   r   r&   rK   
urlunparser(   paramsqueryfragmentquoter   
applies_to	allowancer   )r   	useragentr   
parsed_urlr<   s        r   	can_fetchRobotFileParser.can_fetch   s    >> \\**6<<+?+?+DE
ll%%r"Z__j..
0C0C'E Fll  %C\\E	**s++ " %%//44r   c                 |    SR                  U R                   Vs/ s H  n[        U5      S-   PM     sn5      $ s  snf )NrT   
)joinr   r   r;   s     r   __str__RobotFileParser.__str__   s0    wwt||D|eE
T)|DEEDs   9)r   r   r   r   r'   r   r(   r   N)rT   )__name__
__module____qualname____firstlineno____doc__r   r   r!   r   r,   r=   r   r_   rd   __static_attributes__ r   r   r
   r
      s6    
!(?
9'0#f.Fr   c                   *    \ rS rSrSrS rS rS rSrg)rM      zhA rule line is a single "Allow:" (allowance==True) or "Disallow:"
(allowance==False) followed by a path.c                 v    US:X  a	  U(       d  Sn[         R                  R                  U5      U l        X l        g )NrT   T)r   r   rZ   r(   r\   )r   r(   r\   s      r   r   RuleLine.__init__   s+    2:iILL&&t,	"r   c                 d    U R                   S:H  =(       d    UR                  U R                   5      $ r7   )r(   
startswith)r   filenames     r   r[   RuleLine.applies_to   s%    yyCA8#6#6tyy#AAr   c                 ^    U R                   =(       a    S=(       d    SS-   U R                  -   $ )NAllowDisallowz: r\   r(   r   s    r   rd   RuleLine.__str__   s$    *78jD@499LLr   rx   N)	rf   rg   rh   ri   rj   r   r[   rd   rk   rl   r   r   rM   rM      s    1#BMr   rM   c                   0    \ rS rSrSrS rS rS rS rSr	g)	rE      z?An entry has one or more user-agents and zero or more rulelinesc                      / U l         / U l        g )N)r9   rL   r   s    r   r   Entry.__init__   s    r   c                     / nU R                    H  nUR                  SUS/5        M     U R                   H  nUR                  [        U5      S/5        M!     SR	                  U5      $ )NzUser-agent: rb   rT   )r9   extendrL   r   rc   )r   retagentrP   s       r   rd   Entry.__str__   sY    __EJJt45 %NNDJJD	4() #wws|r   c                     UR                  S5      S   R                  5       nU R                   H"  nUS:X  a    gUR                  5       nX!;   d  M"    g   g)z2check if this entry applies to the specified agentrU   r   r8   TF)rH   rJ   r9   )r   r]   r   s      r   r[   Entry.applies_to   sQ     OOC(+113	__E|KKME! % r   c                 r    U R                    H'  nUR                  U5      (       d  M  UR                  s  $    g)zJPreconditions:
- our agent applies to this entry
- filename is URL decodedT)rL   r[   r\   )r   rs   rP   s      r   r\   Entry.allowance   s0     NNDx((~~% # r   )rL   r9   N)
rf   rg   rh   ri   rj   r   rd   r[   r\   rk   rl   r   r   rE   rE      s    Ir   rE   N)
__future__r   r   r   future.builtinsr   future.backportsr   future.backports.urllibr   _parser	   _request__all__objectr
   rM   rE   rl   r   r   <module>r      s^    B B 
 $ H 
EFf EFPMv M""F "r   