
    9i:M                        S r SSKJrJrJrJr  SSK7  SSKJr  SSK	r	SSK
r
\	R                  " S5      r\	R                  " S5      r\	R                  " S5      r\	R                  " S	5      r\	R                  " S
5      r\	R                  " S5      r\	R                  " S5      r\	R                  " S5      r\	R                  " S5      r\	R                  " S5      r\	R                  " S5      r\	R                  " S\	R.                  5      r\	R                  " S\	R.                  5      r\	R                  " S5      r\	R                  " S5      r " S S\5      r " S S\R<                  5      rg)zLA parser for HTML and XHTML.

Backported for python-future from Python 3.3.
    )absolute_importdivisionprint_functionunicode_literals)*)_markupbaseNz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]>z--\s*>z(([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*z[a-zA-Z][^	
 /> ]*zJ\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*a  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:\s+                             # whitespace before attribute name
    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
      (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |\"[^\"]*\"                # LIT-enclosed value
          |[^'\">\s]+                # bare value
         )
       )?
     )
   )*
  \s*                                # trailing whitespace
aF  
  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
         (?:\s*,)*                   # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   (    \ rS rSrSrSS jrS rSrg)HTMLParseErrorR   z&Exception raised for all parse errors.c                 J    U(       d   eXl         US   U l        US   U l        g )Nr      msglinenooffset)selfr   positions      \/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/future/backports/html/parser.py__init__HTMLParseError.__init__U   s$    
sqkqk    c                     U R                   nU R                  b  USU R                  -  -   nU R                  b  USU R                  S-   -  -   nU$ )Nz, at line %dz, column %dr   r   )r   results     r   __str__HTMLParseError.__str__[   sN    ;;"nt{{::F;;"mt{{Q??Fr   )r   r   r   N))NN)__name__
__module____qualname____firstlineno____doc__r   r   __static_attributes__ r   r   r   r   R   s    0"r   r   c                       \ rS rSrSrSrS S jrS rS rS r	S r
S	rS
 rS rS rS rS rS!S jrS rS rS rS rS rS rS rS rS rS rS rS rS rS rS r Sr!g	)"
HTMLParserd   a  Find tags and other markup and call handler functions.

Usage:
    p = HTMLParser()
    p.feed(data)
    ...
    p.close()

Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag().  The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks).  Entity references are
passed by calling self.handle_entityref() with the entity
reference as the argument.  Numeric character references are
passed to self.handle_charref() with the string containing the
reference as the argument.
)scriptstylec                 r    U(       a  [         R                  " S[        SS9  Xl        U R	                  5         g)zInitialize and reset this instance.

If strict is set to False (the default) the parser will parse invalid
markup, otherwise it will raise an error.  Note that the strict mode
is deprecated.
zThe strict mode is deprecated.   )
stacklevelN)warningswarnDeprecationWarningstrictreset)r   r/   s     r   r   HTMLParser.__init__z   s)     MM:,<

r   c                     SU l         SU l        [        U l        SU l        [
        R                  R                  U 5        g)z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elemr   
ParserBaser0   r   s    r   r0   HTMLParser.reset   s3    -$$T*r   c                 N    U R                   U-   U l         U R                  S5        g)zyFeed data to the parser.

Call this as often as you want, with as little or as much text
as you want (may include '\n').
r   N)r4   goaheadr   datas     r   feedHTMLParser.feed   s     ||d*Qr   c                 &    U R                  S5        g)zHandle any buffered data.r   N)r=   r:   s    r   closeHTMLParser.close   s    Qr   c                 4    [        XR                  5       5      eN)r   getpos)r   messages     r   errorHTMLParser.error   s    Wkkm44r   Nc                     U R                   $ )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr:   s    r   get_starttag_textHTMLParser.get_starttag_text   s    ###r   c                     UR                  5       U l        [        R                  " SU R                  -  [        R                  5      U l        g )Nz</\s*%s\s*>)lowerr8   recompileIr7   )r   elems     r   set_cdata_modeHTMLParser.set_cdata_mode   s/    **,::nt&FMr   c                 (    [         U l        S U l        g rF   )r6   r7   r8   r:   s    r   clear_cdata_modeHTMLParser.clear_cdata_mode   s    -r   c                    U R                   nSn[        U5      nX4:  Ga  U R                  R                  X#5      nU(       a  UR	                  5       nOU R
                  (       a  GOUnX6:  a  U R                  X#U 5        U R                  X65      nX4:X  a  GOUR                  nU" SU5      (       Ga  [        R                  X#5      (       a  U R                  U5      nOU" SU5      (       a  U R                  U5      nOU" SU5      (       a  U R                  U5      nOU" SU5      (       a  U R                  U5      nOeU" SU5      (       a5  U R                  (       a  U R!                  U5      nO4U R#                  U5      nO"US-   U:  a  U R                  S5        US-   nOGOUS:  a  U(       d  GOU R                  (       a  U R%                  S5        UR'                  S	US-   5      nUS:  a!  UR'                  SUS-   5      nUS:  a  US-   nOUS-  nU R                  X#U 5        U R                  X85      nGOU" S
U5      (       a  [(        R                  X#5      nU(       a^  UR+                  5       SS n	U R-                  U	5        UR/                  5       nU" SUS-
  5      (       d  US-
  nU R                  X85      nGM  SX#S  ;   a&  U R                  USS 5        U R                  US5      nGO?U" SU5      (       Ga!  [0        R                  X#5      nU(       a\  UR+                  S5      n	U R3                  U	5        UR/                  5       nU" SUS-
  5      (       d  US-
  nU R                  X85      nGMa  [4        R                  X#5      nU(       a]  U(       aU  UR+                  5       X#S  :X  a?  U R                  (       a  U R%                  S5        OWU::  a  UnU R                  X3S-   5      nO?US-   U:  a&  U R                  S5        U R                  X3S-   5      nOO S5       eX4:  a  GM  U(       a:  X4:  a5  U R
                  (       d$  U R                  X#U 5        U R                  X45      nX#S  U l         g )Nr   <</<!--<?<!r   zEOF in middle of constructr	   &#r*   ;&z#EOF in middle of entity or char refzinteresting.search() lied)r4   lenr7   searchstartr8   handle_data	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_pir/   parse_declarationparse_html_declarationrI   findcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)
r   rv   r4   inrk   jri   knames
             r   r=   HTMLParser.goahead   s   ,,Le$$++G7EKKM??ud&&w|4q$Avu ++J#q!!%%g11++A.Aa(())!,A****1-Aa((a(Aa(({{ 2215 77:!eq[$$S)AAq5{{

#?@S!a%0A1u#LLa!e4q5 !AAQ$$Wq\2NN1(D!$$g1 ;;=2.D''-		A%c1Q3//Eq,Agbk)((16 NN1a0C##!3 ;;q>D))$/		A%c1Q3//Eq,A"((4u{{};;; JJ'LM Av$% $qa% 8A!eq[ $$S)qa%0A555qu ex 15Wq\*q$Ar{r   c                 p   U R                   nX!US-    S:X  d   S5       eX!US-    S:X  a  U R                  U5      $ X!US-    S:X  a  U R                  U5      $ X!US-    R                  5       S	:X  a7  UR	                  S
US-   5      nUS:X  a  gU R                  X!S-   U 5        US-   $ U R                  U5      $ )Nr*   r_   z+unexpected call to parse_html_declaration()   r]      z<![	   z	<!doctyper	   ra   r   )r4   rn   parse_marked_sectionrP   rr   handle_declparse_bogus_comment)r   rz   r4   gtposs       r   rq   !HTMLParser.parse_html_declaration  s    ,,1~% 	D )C 	D%QqS>V#%%a((qs^u$,,Q//qs^!!#{2LLac*E{WqS/07N++A..r   c                     U R                   nX1US-    S;   d   S5       eUR                  SUS-   5      nUS:X  a  gU(       a  U R                  X1S-   U 5        US-   $ )Nr*   )r_   r\   z"unexpected call to parse_comment()r	   ra   r   )r4   rr   handle_comment)r   rz   reportr4   poss        r   r   HTMLParser.parse_bogus_comment-  sq    ,,1~- 	C 1B 	C-ll3!$"9!C 01Qwr   c                     U R                   nX!US-    S:X  d   S5       e[        R                  X!S-   5      nU(       d  gUR                  5       nU R	                  X!S-   U 5        UR                  5       nU$ )Nr*   r^   zunexpected call to parse_pi()ra   )r4   piclosere   rf   	handle_pirv   )r   rz   r4   rk   r|   s        r   ro   HTMLParser.parse_pi9  sn    ,,1~%F'FF%w!,KKMwsA'IIKr   c                 t   S U l         U R                  U5      nUS:  a  U$ U R                  nX1U U l         / n[        R	                  X1S-   5      nU(       d   S5       eUR                  5       nUR                  S5      R                  5       =U l        nXb:  a  U R                  (       a  [        R	                  X65      nO[        R	                  X65      nU(       d  OUR                  SSS5      u  pnU
(       d  S nO0US S Ss=:X  a	  USS  :X  d  O  US S Ss=:X  a	  USS  :X  a  O  OUSS nU(       a  U R                  U5      nUR                  U	R                  5       U45        UR                  5       nXb:  a  M  X6U R                  5       nUS	;  a  U R                  5       u  pS
U R                   ;   aO  XR                   R!                  S
5      -   n[#        U R                   5      U R                   R%                  S
5      -
  nOU[#        U R                   5      -   nU R                  (       a  U R'                  SX6U S S < 35        U R)                  X1U 5        U$ UR+                  S5      (       a  U R-                  Xt5        U$ U R/                  Xt5        XpR0                  ;   a  U R3                  U5        U$ )Nr   r   z#unexpected call to parse_starttag()r*   r   'ra   ")r	   />
zjunk characters in start tag:    r   )rL   check_for_whole_start_tagr4   tagfindrk   rv   rt   rP   r5   r/   attrfindattrfind_tolerantunescapeappendstriprG   countrd   rfindrI   rg   endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSrU   )r   rz   endposr4   attrsrk   r}   tagmattrnamerest	attrvaluerv   r   r   s                  r   rl   HTMLParser.parse_starttagE  su   #//2A:M,,&0 gs+;;;uIIK"[[^1133sj{{NN7.%++G7()1a(8%HI 	2A$8)BC.82A#7237%aO	 MM)4	LL(..*I67A! j$ %%'k!![[]NFt+++"6"6"<"<T"BBT112//55d;<  #d&:&:";;{{

%/47 8Wv./M<<##C/
    ,111##C(r   c                    U R                   nU R                  (       a  [        R                  X!5      nO[        R                  X!5      nU(       a  UR                  5       nX$US-    nUS:X  a  US-   $ US:X  av  UR                  SU5      (       a  US-   $ UR                  SU5      (       a  gU R                  (       a%  U R                  XS-   5        U R                  S5        XA:  a  U$ US-   $ US:X  a  gUS	;   a  gU R                  (       a"  U R                  X5        U R                  S
5        XA:  a  U$ US-   $ [        S5      e)Nr   r	   /r   r*   ra   zmalformed empty start tagr3   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzmalformed start tagzwe should not get here!)
r4   r/   locatestarttagendrk   locatestarttagend_tolerantrv   ri   rh   rI   AssertionError)r   rz   r4   r   r|   nexts         r   r   $HTMLParser.check_for_whole_start_tag~  s(   ,,;;!''3A*00<AAQqS>Ds{1us{%%dA..q5L%%c1--;;NN1!e,JJ:;5Hq5Lrz 5 6 {{q$

01u1u677r   c                    U R                   nX!US-    S:X  d   S5       e[        R                  X!S-   5      nU(       d  gUR                  5       n[        R                  X!5      nU(       d  U R                  b  U R                  X!U 5        U$ U R                  (       a  U R                  SX!U < 35        [        R                  X!S-   5      nU(       d!  X!US-    S:X  a  US-   $ U R                  U5      $ UR                  5       R                  5       nUR                  S	UR                  5       5      nU R                  U5        US-   $ UR                  S5      R                  5       nU R                  b$  XpR                  :w  a  U R                  X!U 5        U$ U R                  UR                  5       5        U R!                  5         U$ )
Nr*   r\   zunexpected call to parse_endtagr   ra   zbad end tag: r   z</>r	   )r4   	endendtagre   rv   
endtagfindrk   r8   rg   r/   rI   tagfind_tolerantr   rt   rP   rr   handle_endtagrX   )r   rz   r4   rk   r   	namematchtagnamerT   s           r   rm   HTMLParser.parse_endtag  s   ,,1~%H'HH%  A#.		  ,*  5!12{{

%0@BC(..w!<IQqS>U*Q3J33A66oo'--/G
 LLimmo6Ew'7N{{1~##%??&&  5!124::<(r   c                 H    U R                  X5        U R                  U5        g rF   )r   r   r   r   r   s      r   r   HTMLParser.handle_startendtag  s    S(3r   c                     g rF   r#   r   s      r   r   HTMLParser.handle_starttag      r   c                     g rF   r#   )r   r   s     r   r   HTMLParser.handle_endtag  r   r   c                     g rF   r#   r   r~   s     r   ru   HTMLParser.handle_charref  r   r   c                     g rF   r#   r   s     r   rx   HTMLParser.handle_entityref  r   r   c                     g rF   r#   r>   s     r   rg   HTMLParser.handle_data  r   r   c                     g rF   r#   r>   s     r   r   HTMLParser.handle_comment  r   r   c                     g rF   r#   )r   decls     r   r   HTMLParser.handle_decl  r   r   c                     g rF   r#   r>   s     r   r   HTMLParser.handle_pi  r   r   c                 R    U R                   (       a  U R                  SU< 35        g g )Nzunknown declaration: )r/   rI   r>   s     r   unknown_declHTMLParser.unknown_decl  s    ;;JJD:; r   c                 F    SU;  a  U$ S n[         R                  " SX!5      $ )Nrc   c                    U R                  5       S   n  U S   S:X  aR  U SS  n U S   S;   a  [        U SS  R                  S5      S5      nO[        U R                  S5      5      n[        U5      $  SSKJn  X;   a  X    $ U R                  S5      (       a  SU -   $ [        S	[        U 5      5       H  nU S U U;   d  M  X S U    XS  -   s  $    SU -   $ ! [         a    S
U -   s $ f = f)Nr   #r   )xXrb      )html5rc   r*   r`   )
groupsintrstripchrfuture.backports.html.entitiesr   r   rangerd   
ValueError)scr   r   s       r   replaceEntities,HTMLParser.unescape.<locals>.replaceEntities  s    
1A#Q43;!"Aty(!"S 126.q6M  A: 8OZZ__7Nq#a&)A!u~$rU|ae33 * 7N   ax s   AC C%$C%z&&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?)))rQ   sub)r   r   r   s      r   r   HTMLParser.unescape  s,    a<H	#0 vv?%* 	*r   )__starttag_textr8   r7   r5   r4   r/   )F)r   )"r   r   r   r    r!   r   r   r0   r@   rC   rI   rL   rM   rU   rX   r=   rq   r   ro   rl   r   rm   r   r   r   ru   rx   rg   r   r   r   r   r   r"   r#   r   r   r%   r%   d   s    & 1+5 O$Nc#P/*		5r(8V'T 
<
*r   r%   ) r!   
__future__r   r   r   r   future.builtinsfuture.backportsr   rQ   r,   rR   r6   ry   rw   rs   rj   r   commentcloser   r   r   r   VERBOSEr   r   r   r   	Exceptionr   r9   r%   r#   r   r   <module>r      sP  : :  ( 	  ZZ' ZZ%
JJ>?	
**@
Azz+&
**S/zz)$
**?
@ ::;<  ::,- JJ=>  JJ   ZZ   ZZ ) ZZ  JJsO	 ZZ=>
Y $t*'' t*r   