U
    
Ha,                    @   s   d dl mZmZmZ d dlmZ d dlmZm	Z	 d dl
mZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ eeZedkreZne	ZG dd deZdS )    )absolute_importdivisionunicode_literals)unichr)dequeOrderedDict)version_info   )spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypestagTokenTypes)replacementCharacters)HTMLInputStream)Trie)      c                       sd  e Zd ZdZd fdd	Zdd Zdd Zdd
dZdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Zd<d= Z d>d? Z!d@dA Z"dBdC Z#dDdE Z$dFdG Z%dHdI Z&dJdK Z'dLdM Z(dNdO Z)dPdQ Z*dRdS Z+dTdU Z,dVdW Z-dXdY Z.dZd[ Z/d\d] Z0d^d_ Z1d`da Z2dbdc Z3ddde Z4dfdg Z5dhdi Z6djdk Z7dldm Z8dndo Z9dpdq Z:drds Z;dtdu Z<dvdw Z=dxdy Z>dzd{ Z?d|d} Z@d~d ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdd ZL  ZMS )HTMLTokenizera	   This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    Nc                    sF   t |f|| _|| _d| _g | _| j| _d| _d | _t	t
|   d S )NF)r   streamparser
escapeFlaglastFourChars	dataStatestateescapecurrentTokensuperr   __init__)selfr   r   kwargs	__class__ C/tmp/pip-unpacked-wheel-tx790h60/pip/_vendor/html5lib/_tokenizer.pyr"   (   s    zHTMLTokenizer.__init__c                 c   sP   t g | _|  rL| jjr6td | jjddV  q| jr
| j V  q6q
dS )z This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        
ParseErrorr   typedataN)r   
tokenQueuer   r   errorsr   poppopleftr#   r'   r'   r(   __iter__7   s    
zHTMLTokenizer.__iter__c           	   %   C   s  t }d}|rt}d}g }| j }||krH|tk	rH|| | j }q"td||}|tkrt| }| j	t
d dd|id nbd|  krd	ksn |d
krd}| j	t
d dd|id n d|  krdksn d|  krdksn d|  krdksn d|  kr,dksn |tddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d
g#kr| j	t
d dd|id zt|}W n> tk
r   |d6 }td|d? B td7|d8@ B  }Y nX |d9kr| j	t
d d:d; | j| |S )<zThis function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        
       r)   z$illegal-codepoint-for-numeric-entity	charAsIntr+   r,   datavarsi   i  i    �r	                  i  i     i  i  i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i i   i   i  ;z numeric-entity-without-semicolonr*   )r   r   r   charr   appendintjoinr   r-   r   	frozensetchr
ValueErrorunget)	r#   isHexallowedradix	charStackcr6   rA   vr'   r'   r(   consumeNumberEntityG   s    

 
 

 

                       &
z!HTMLTokenizer.consumeNumberEntityFc           	      C   s  d}| j  g}|d tksB|d tddfksB|d k	rV||d krV| j |d  n|d dkrd}|| j   |d dkrd}|| j   |r|d tks|s|d tkr| j |d  | |}n4| j	t
d	 d
d | j |  dd| }nh|d tk	rDtd|s0qD|| j   qz$td|d d }t|}W n tk
r   d }Y nX |d k	r@|d dkr| j	t
d	 dd |d dkr|r|| tks|| tks|| dkr| j |  dd| }n.t| }| j |  |d||d  7 }n4| j	t
d	 dd | j |  dd| }|r| jd d d  |7  < n*|tkrd}nd}| j	t
| |d d S )N&r   <#F)xXTr)   zexpected-numeric-entityr*   r5   r@   znamed-entity-without-semicolon=zexpected-named-entityr,   r	   SpaceCharacters
Characters)r   rA   r
   r   rH   rB   r   r   rO   r-   r   r/   rD   entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr   r   r    )	r#   allowedCharfromAttributeoutputrL   hex
entityNameentityLength	tokenTyper'   r'   r(   consumeEntity   s~    







zHTMLTokenizer.consumeEntityc                 C   s   | j |dd dS )zIThis method replaces the need for "entityInAttributeValueState".
        T)r^   r_   N)re   )r#   r^   r'   r'   r(   processEntityInAttribute   s    z&HTMLTokenizer.processEntityInAttributec                 C   s   | j }|d tkr|d t|d< |d td krp|d }t|}t|t|krh||ddd  ||d< |d td kr|d r| j	td d	d
 |d r| j	td dd
 | j	| | j
| _dS )zThis method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r+   nameStartTagr,   NrS   EndTagr)   zattributes-in-end-tagr*   selfClosingzself-closing-flag-on-end-tag)r    r   	translater   r   attributeMapr\   updater-   rB   r   r   )r#   tokenrawr,   r'   r'   r(   emitCurrentToken   s(    zHTMLTokenizer.emitCurrentTokenc                 C   s   | j  }|dkr| j| _n|dkr.| j| _n|dkrd| jtd dd | jtd dd n`|tkrpdS |t	kr| jtd	 || j 
t	d
 d n&| j 
d}| jtd || d d
S )NrP   rQ    r)   invalid-codepointr*   rX   FrW   TrP   rQ   rq   )r   rA   entityDataStater   tagOpenStater-   rB   r   r   r
   
charsUntilr#   r,   charsr'   r'   r(   r      s.    




zHTMLTokenizer.dataStatec                 C   s   |    | j| _dS NT)re   r   r   r1   r'   r'   r(   rt     s    zHTMLTokenizer.entityDataStatec                 C   s   | j  }|dkr| j| _n|dkr.| j| _n|tkr:dS |dkrp| jtd dd | jtd d	d nT|t	kr| jtd
 || j 
t	d d n&| j 
d}| jtd || d dS )NrP   rQ   Frq   r)   rr   r*   rX   r9   rW   Trs   )r   rA   characterReferenceInRcdatar   rcdataLessThanSignStater   r-   rB   r   r
   rv   rw   r'   r'   r(   rcdataState"  s.    




zHTMLTokenizer.rcdataStatec                 C   s   |    | j| _dS ry   )re   r|   r   r1   r'   r'   r(   rz   ?  s    z(HTMLTokenizer.characterReferenceInRcdatac                 C   s   | j  }|dkr| j| _nh|dkrR| jtd dd | jtd dd n2|tkr^dS | j d	}| jtd || d d
S NrQ   rq   r)   rr   r*   rX   r9   F)rQ   rq   T)	r   rA   rawtextLessThanSignStater   r-   rB   r   r   rv   rw   r'   r'   r(   rawtextStateD  s"    


zHTMLTokenizer.rawtextStatec                 C   s   | j  }|dkr| j| _nh|dkrR| jtd dd | jtd dd n2|tkr^dS | j d	}| jtd || d d
S r}   )	r   rA   scriptDataLessThanSignStater   r-   rB   r   r   rv   rw   r'   r'   r(   scriptDataStateV  s"    


zHTMLTokenizer.scriptDataStatec                 C   sr   | j  }|tkrdS |dkrL| jtd dd | jtd dd n"| jtd || j d d dS )	NFrq   r)   rr   r*   rX   r9   T)r   rA   r   r-   rB   r   rv   r#   r,   r'   r'   r(   plaintextStateh  s    

zHTMLTokenizer.plaintextStatec                 C   s  | j  }|dkr| j| _n|dkr.| j| _n|tkrVtd |g ddd| _| j| _n|dkr| j	
td dd	 | j	
td
 dd	 | j| _nt|dkr| j	
td dd	 | j | | j| _n@| j	
td dd	 | j	
td
 dd	 | j | | j| _dS )N!/rh   F)r+   rg   r,   rj   selfClosingAcknowledged>r)   z'expected-tag-name-but-got-right-bracketr*   rX   z<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerQ   T)r   rA   markupDeclarationOpenStater   closeTagOpenStater   r   r    tagNameStater-   rB   r   rH   bogusCommentStater   r'   r'   r(   ru   w  s@    


 


zHTMLTokenizer.tagOpenStatec                 C   s   | j  }|tkr0td |g dd| _| j| _n|dkrX| jtd dd | j	| _nn|t
kr| jtd dd | jtd	 d
d | j	| _n0| jtd dd|id | j | | j| _dS )Nri   Fr+   rg   r,   rj   r   r)   z*expected-closing-tag-but-got-right-bracketr*   z expected-closing-tag-but-got-eofrX   </z!expected-closing-tag-but-got-charr,   r7   T)r   rA   r   r   r    r   r   r-   rB   r   r   rH   r   r   r'   r'   r(   r     s2    
 


zHTMLTokenizer.closeTagOpenStatec                 C   s   | j  }|tkr| j| _n|dkr.|   n~|tkrV| jt	d dd | j
| _nV|dkrh| j| _nD|dkr| jt	d dd | jd  d	7  < n| jd  |7  < d
S )Nr   r)   zeof-in-tag-namer*   r   rq   rr   rg   r9   T)r   rA   r
   beforeAttributeNameStater   rp   r   r-   rB   r   r   selfClosingStartTagStater    r   r'   r'   r(   r     s&    




zHTMLTokenizer.tagNameStatec                 C   sP   | j  }|dkr"d| _| j| _n*| jtd dd | j | | j	| _dS Nr   r5   rX   rQ   r*   T)
r   rA   temporaryBufferrcdataEndTagOpenStater   r-   rB   r   rH   r|   r   r'   r'   r(   r{     s    

z%HTMLTokenizer.rcdataLessThanSignStatec                 C   sX   | j  }|tkr*|  j|7  _| j| _n*| jtd dd | j 	| | j
| _dS NrX   r   r*   T)r   rA   r   r   rcdataEndTagNameStater   r-   rB   r   rH   r|   r   r'   r'   r(   r     s    

z#HTMLTokenizer.rcdataEndTagOpenStatec                 C   s   | j o| j d  | j k}| j }|tkrT|rTtd | jg dd| _ | j| _n|dkr|rtd | jg dd| _ | j	| _n||dkr|rtd | jg dd| _ | 
  | j| _nH|tkr|  j|7  _n0| jtd d| j d	 | j| | j| _d
S Nrg   ri   Fr   r   r   rX   r   r*   T)r    lowerr   r   rA   r
   r   r   r   r   rp   r   r   r-   rB   rH   r|   r#   appropriater,   r'   r'   r(   r     s@    
 
 
 
z#HTMLTokenizer.rcdataEndTagNameStatec                 C   sP   | j  }|dkr"d| _| j| _n*| jtd dd | j | | j	| _dS r   )
r   rA   r   rawtextEndTagOpenStater   r-   rB   r   rH   r   r   r'   r'   r(   r~     s    

z&HTMLTokenizer.rawtextLessThanSignStatec                 C   sX   | j  }|tkr*|  j|7  _| j| _n*| jtd dd | j 	| | j
| _dS r   )r   rA   r   r   rawtextEndTagNameStater   r-   rB   r   rH   r   r   r'   r'   r(   r     s    

z$HTMLTokenizer.rawtextEndTagOpenStatec                 C   s   | j o| j d  | j k}| j }|tkrT|rTtd | jg dd| _ | j| _n|dkr|rtd | jg dd| _ | j	| _n||dkr|rtd | jg dd| _ | 
  | j| _nH|tkr|  j|7  _n0| jtd d| j d	 | j| | j| _d
S r   )r    r   r   r   rA   r
   r   r   r   r   rp   r   r   r-   rB   rH   r   r   r'   r'   r(   r     s@    
 
 
 
z$HTMLTokenizer.rawtextEndTagNameStatec                 C   sx   | j  }|dkr"d| _| j| _nR|dkrJ| jtd dd | j| _n*| jtd dd | j 	| | j
| _dS )	Nr   r5   r   rX   z<!r*   rQ   T)r   rA   r   scriptDataEndTagOpenStater   r-   rB   r   scriptDataEscapeStartStaterH   r   r   r'   r'   r(   r   ,  s    


z)HTMLTokenizer.scriptDataLessThanSignStatec                 C   sX   | j  }|tkr*|  j|7  _| j| _n*| jtd dd | j 	| | j
| _dS r   )r   rA   r   r   scriptDataEndTagNameStater   r-   rB   r   rH   r   r   r'   r'   r(   r   :  s    

z'HTMLTokenizer.scriptDataEndTagOpenStatec                 C   s   | j o| j d  | j k}| j }|tkrT|rTtd | jg dd| _ | j| _n|dkr|rtd | jg dd| _ | j	| _n||dkr|rtd | jg dd| _ | 
  | j| _nH|tkr|  j|7  _n0| jtd d| j d	 | j| | j| _d
S r   )r    r   r   r   rA   r
   r   r   r   r   rp   r   r   r-   rB   rH   r   r   r'   r'   r(   r   E  s@    
 
 
 
z'HTMLTokenizer.scriptDataEndTagNameStatec                 C   sJ   | j  }|dkr2| jtd dd | j| _n| j | | j| _dS N-rX   r*   T)	r   rA   r-   rB   r   scriptDataEscapeStartDashStater   rH   r   r   r'   r'   r(   r   a  s    

z(HTMLTokenizer.scriptDataEscapeStartStatec                 C   sJ   | j  }|dkr2| jtd dd | j| _n| j | | j| _dS r   )	r   rA   r-   rB   r   scriptDataEscapedDashDashStater   rH   r   r   r'   r'   r(   r   k  s    

z,HTMLTokenizer.scriptDataEscapeStartDashStatec                 C   s   | j  }|dkr2| jtd dd | j| _n|dkrD| j| _nn|dkrz| jtd dd | jtd dd n8|tkr| j	| _n&| j 
d	}| jtd || d d
S )Nr   rX   r*   rQ   rq   r)   rr   r9   )rQ   r   rq   T)r   rA   r-   rB   r   scriptDataEscapedDashStater   "scriptDataEscapedLessThanSignStater   r   rv   rw   r'   r'   r(   scriptDataEscapedStateu  s(    




z$HTMLTokenizer.scriptDataEscapedStatec                 C   s   | j  }|dkr2| jtd dd | j| _n|dkrD| j| _nn|dkr| jtd dd | jtd dd | j| _n0|t	kr| j
| _n| jtd |d | j| _d	S )
Nr   rX   r*   rQ   rq   r)   rr   r9   T)r   rA   r-   rB   r   r   r   r   r   r   r   r   r'   r'   r(   r     s&    




z(HTMLTokenizer.scriptDataEscapedDashStatec                 C   s   | j  }|dkr*| jtd dd n|dkr<| j| _n|dkrd| jtd dd | j| _nn|dkr| jtd dd | jtd d	d | j| _n0|t	kr| j
| _n| jtd |d | j| _d
S )Nr   rX   r*   rQ   r   rq   r)   rr   r9   T)r   rA   r-   rB   r   r   r   r   r   r   r   r   r'   r'   r(   r     s*    




z,HTMLTokenizer.scriptDataEscapedDashDashStatec                 C   s   | j  }|dkr"d| _| j| _n\|tkrT| jtd d| d || _| j	| _n*| jtd dd | j 
| | j| _dS r   )r   rA   r    scriptDataEscapedEndTagOpenStater   r   r-   rB   r    scriptDataDoubleEscapeStartStaterH   r   r   r'   r'   r(   r     s    


z0HTMLTokenizer.scriptDataEscapedLessThanSignStatec                 C   sP   | j  }|tkr"|| _| j| _n*| jtd dd | j 	| | j
| _dS r   )r   rA   r   r    scriptDataEscapedEndTagNameStater   r-   rB   r   rH   r   r   r'   r'   r(   r     s    

z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatec                 C   s   | j o| j d  | j k}| j }|tkrT|rTtd | jg dd| _ | j| _n|dkr|rtd | jg dd| _ | j	| _n||dkr|rtd | jg dd| _ | 
  | j| _nH|tkr|  j|7  _n0| jtd d| j d	 | j| | j| _d
S r   )r    r   r   r   rA   r
   r   r   r   r   rp   r   r   r-   rB   rH   r   r   r'   r'   r(   r     s@    
 
 
 
z.HTMLTokenizer.scriptDataEscapedEndTagNameStatec                 C   s   | j  }|ttdB krR| jtd |d | j dkrH| j	| _
q| j| _
nB|tkr| jtd |d |  j|7  _n| j | | j| _
dS N)r   r   rX   r*   scriptT)r   rA   r
   rE   r-   rB   r   r   r   scriptDataDoubleEscapedStater   r   r   rH   r   r'   r'   r(   r     s    


z.HTMLTokenizer.scriptDataDoubleEscapeStartStatec                 C   s   | j  }|dkr2| jtd dd | j| _n|dkrZ| jtd dd | j| _nt|dkr| jtd dd | jtd dd n>|tkr| jtd d	d | j	| _n| jtd |d d
S Nr   rX   r*   rQ   rq   r)   rr   r9   eof-in-script-in-scriptT)
r   rA   r-   rB   r    scriptDataDoubleEscapedDashStater   (scriptDataDoubleEscapedLessThanSignStater   r   r   r'   r'   r(   r     s*    




z*HTMLTokenizer.scriptDataDoubleEscapedStatec                 C   s   | j  }|dkr2| jtd dd | j| _n|dkrZ| jtd dd | j| _n|dkr| jtd dd | jtd dd | j| _nF|t	kr| jtd d	d | j
| _n| jtd |d | j| _d
S r   )r   rA   r-   rB   r   $scriptDataDoubleEscapedDashDashStater   r   r   r   r   r   r'   r'   r(   r     s.    




z.HTMLTokenizer.scriptDataDoubleEscapedDashStatec                 C   s  | j  }|dkr*| jtd dd n|dkrR| jtd dd | j| _n|dkrz| jtd dd | j| _n|dkr| jtd dd | jtd d	d | j| _nF|t	kr| jtd d
d | j
| _n| jtd |d | j| _dS )Nr   rX   r*   rQ   r   rq   r)   rr   r9   r   T)r   rA   r-   rB   r   r   r   r   r   r   r   r   r'   r'   r(   r   %  s2    




z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatec                 C   sP   | j  }|dkr8| jtd dd d| _| j| _n| j | | j	| _dS )Nr   rX   r*   r5   T)
r   rA   r-   rB   r   r   scriptDataDoubleEscapeEndStater   rH   r   r   r'   r'   r(   r   >  s    

z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatec                 C   s   | j  }|ttdB krR| jtd |d | j dkrH| j	| _
q| j| _
nB|tkr| jtd |d |  j|7  _n| j | | j| _
dS r   )r   rA   r
   rE   r-   rB   r   r   r   r   r   r   r   rH   r   r'   r'   r(   r   I  s    


z,HTMLTokenizer.scriptDataDoubleEscapeEndStatec                 C   s0  | j  }|tkr$| j td n|tkrJ| jd |dg | j| _n|dkr\| 	  n|dkrn| j
| _n|dkr| jtd dd	 | jd |dg | j| _n|d
kr| jtd dd	 | jd ddg | j| _nF|tkr| jtd dd	 | j| _n| jd |dg | j| _dS )NTr,   r5   r   r   )'"rV   rQ   r)   #invalid-character-in-attribute-namer*   rq   rr   r9   z#expected-attribute-name-but-got-eof)r   rA   r
   rv   r   r    rB   attributeNameStater   rp   r   r-   r   r   r   r   r'   r'   r(   r   Y  s<    







z&HTMLTokenizer.beforeAttributeNameStatec                 C   s  | j  }d}d}|dkr&| j| _n.|tkr\| jd d d  || j td 7  < d}n|dkrjd}n|tkr|| j| _n|dkr| j	| _n|d	kr| j
td
 dd | jd d d  d7  < d}n|dkr| j
td
 dd | jd d d  |7  < d}nH|tkr6| j
td
 dd | j| _n| jd d d  |7  < d}|r| jd d d t| jd d d< | jd d d D ]>\}}| jd d d |kr| j
td
 dd  qҐq|r|   dS )NTFrV   r,   rS   r   r   r   rq   r)   rr   r*   r9   r   r   rQ   r   zeof-in-attribute-namezduplicate-attribute)r   rA   beforeAttributeValueStater   r   r    rv   r
   afterAttributeNameStater   r-   rB   r   r   r   rk   r   rp   )r#   r,   leavingThisState	emitTokenrg   _r'   r'   r(   r   w  s^    






z HTMLTokenizer.attributeNameStatec                 C   sD  | j  }|tkr$| j td n|dkr8| j| _n|dkrJ|   n|tkrp| jd 	|dg | j
| _n|dkr| j| _n|dkr| j	td d	d
 | jd 	ddg | j
| _n|dkr| j	td dd
 | jd 	|dg | j
| _nF|tkr$| j	td dd
 | j| _n| jd 	|dg | j
| _dS )NTrV   r   r,   r5   r   rq   r)   rr   r*   r9   r   z&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r   rA   r
   rv   r   r   rp   r   r    rB   r   r   r-   r   r   r   r   r'   r'   r(   r     s@    







z%HTMLTokenizer.afterAttributeNameStatec                 C   sh  | j  }|tkr$| j td n@|dkr8| j| _n,|dkrX| j| _| j | n|dkrj| j| _n|dkr| j	
td dd |   n|d	kr| j	
td d
d | jd d d  d7  < | j| _n|dkr| j	
td dd | jd d d  |7  < | j| _nL|tkrB| j	
td dd | j| _n"| jd d d  |7  < | j| _dS )NTr   rP   r   r   r)   z.expected-attribute-value-but-got-right-bracketr*   rq   rr   r,   rS   r	   r9   )rV   rQ   `z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)r   rA   r
   rv   attributeValueDoubleQuotedStater   attributeValueUnQuotedStaterH   attributeValueSingleQuotedStater-   rB   r   rp   r    r   r   r   r'   r'   r(   r     sF    







z'HTMLTokenizer.beforeAttributeValueStatec                 C   s   | j  }|dkr| j| _n|dkr0| d n|dkrj| jtd dd | jd d d	  d
7  < nN|t	kr| jtd dd | j
| _n&| jd d d	  || j d 7  < dS )Nr   rP   rq   r)   rr   r*   r,   rS   r	   r9   z#eof-in-attribute-value-double-quote)r   rP   rq   Tr   rA   afterAttributeValueStater   rf   r-   rB   r   r    r   r   rv   r   r'   r'   r(   r     s&    



z-HTMLTokenizer.attributeValueDoubleQuotedStatec                 C   s   | j  }|dkr| j| _n|dkr0| d n|dkrj| jtd dd | jd d d	  d
7  < nN|t	kr| jtd dd | j
| _n&| jd d d	  || j d 7  < dS )Nr   rP   rq   r)   rr   r*   r,   rS   r	   r9   z#eof-in-attribute-value-single-quote)r   rP   rq   Tr   r   r'   r'   r(   r     s&    



z-HTMLTokenizer.attributeValueSingleQuotedStatec                 C   s  | j  }|tkr| j| _n|dkr0| d n|dkrB|   n|dkr|| jt	d dd | j
d d d	  |7  < n|d
kr| jt	d dd | j
d d d	  d7  < nV|tkr| jt	d dd | j| _n.| j
d d d	  || j tdtB  7  < dS )NrP   r   )r   r   rV   rQ   r   r)   z0unexpected-character-in-unquoted-attribute-valuer*   r,   rS   r	   rq   rr   r9   z eof-in-attribute-value-no-quotes)rP   r   r   r   rV   rQ   r   rq   T)r   rA   r
   r   r   rf   rp   r-   rB   r   r    r   r   rv   rE   r   r'   r'   r(   r     s4    





z)HTMLTokenizer.attributeValueUnQuotedStatec                 C   s   | j  }|tkr| j| _n|dkr.|   np|dkr@| j| _n^|tkrt| j	t
d dd | j | | j| _n*| j	t
d dd | j | | j| _dS )Nr   r   r)   z$unexpected-EOF-after-attribute-valuer*   z*unexpected-character-after-attribute-valueT)r   rA   r
   r   r   rp   r   r   r-   rB   r   rH   r   r   r'   r'   r(   r   .  s&    




z&HTMLTokenizer.afterAttributeValueStatec                 C   s   | j  }|dkr&d| jd< |   n^|tkrZ| jtd dd | j | | j	| _
n*| jtd dd | j | | j| _
dS )Nr   Trj   r)   z#unexpected-EOF-after-solidus-in-tagr*   z)unexpected-character-after-solidus-in-tag)r   rA   r    rp   r   r-   rB   r   rH   r   r   r   r   r'   r'   r(   r   B  s     



z&HTMLTokenizer.selfClosingStartTagStatec                 C   sD   | j d}|dd}| jtd |d | j   | j| _dS )Nr   rq   r9   Commentr*   T)	r   rv   replacer-   rB   r   rA   r   r   r   r'   r'   r(   r   T  s    
zHTMLTokenizer.bogusCommentStatec                 C   s  | j  g}|d dkrR|| j   |d dkrPtd dd| _| j| _dS n|d dkrd}dD ](}|| j   |d |krfd	} qqf|rtd
 dd d dd| _| j| _dS n|d dkrD| jd k	rD| jj	j
rD| jj	j
d j| jj	jkrDd}dD ].}|| j   |d |krd	} q2q|rD| j| _dS | jtd dd |rt| j |  qZ| j| _dS )NrS   r   r   r5   r*   T)dD))oOrM   CtTyYpPeEFDoctype)r+   rg   publicIdsystemIdcorrect[)r   r   Ar   r   r   r)   zexpected-dashes-or-doctype)r   rA   rB   r   r    commentStartStater   doctypeStater   treeopenElements	namespacedefaultNamespacecdataSectionStater-   rH   r/   r   )r#   rL   matchedexpectedr'   r'   r(   r   c  s\     
z(HTMLTokenizer.markupDeclarationOpenStatec                 C   s   | j  }|dkr| j| _n|dkrN| jtd dd | jd  d7  < n|dkr| jtd d	d | j| j | j| _nP|t	kr| jtd d
d | j| j | j| _n| jd  |7  < | j
| _dS )Nr   rq   r)   rr   r*   r,   r9   r   incorrect-commenteof-in-commentT)r   rA   commentStartDashStater   r-   rB   r   r    r   r   commentStater   r'   r'   r(   r     s.    



zHTMLTokenizer.commentStartStatec                 C   s   | j  }|dkr| j| _n|dkrN| jtd dd | jd  d7  < n|dkr| jtd d	d | j| j | j| _nT|t	kr| jtd d
d | j| j | j| _n| jd  d| 7  < | j
| _dS )Nr   rq   r)   rr   r*   r,      -�r   r   r   T)r   rA   commentEndStater   r-   rB   r   r    r   r   r   r   r'   r'   r(   r     s.    



z#HTMLTokenizer.commentStartDashStatec                 C   s   | j  }|dkr| j| _n|dkrN| jtd dd | jd  d7  < nT|tkr| jtd dd | j| j | j	| _n| jd  || j 
d	 7  < d
S )Nr   rq   r)   rr   r*   r,   r9   r   )r   rq   T)r   rA   commentEndDashStater   r-   rB   r   r    r   r   rv   r   r'   r'   r(   r     s$    



zHTMLTokenizer.commentStatec                 C   s   | j  }|dkr| j| _n|dkrV| jtd dd | jd  d7  < | j| _nT|t	kr| jtd dd | j| j | j
| _n| jd  d| 7  < | j| _d	S )
Nr   rq   r)   rr   r*   r,   r   zeof-in-comment-end-dashT)r   rA   r   r   r-   rB   r   r    r   r   r   r   r'   r'   r(   r     s$    



z!HTMLTokenizer.commentEndDashStatec                 C   s,  | j  }|dkr*| j| j | j| _n|dkrd| jtd dd | jd  d7  < | j| _n|dkr| jtd d	d | j	| _n|d
kr| jtd dd | jd  |7  < nj|t
kr| jtd dd | j| j | j| _n4| jtd dd | jd  d| 7  < | j| _dS )Nr   rq   r)   rr   r*   r,   u   --�r   z,unexpected-bang-after-double-dash-in-commentr   z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)r   rA   r-   rB   r    r   r   r   r   commentEndBangStater   r   r'   r'   r(   r     s@    




zHTMLTokenizer.commentEndStatec                 C   s   | j  }|dkr*| j| j | j| _n|dkrN| jd  d7  < | j| _n|dkr| jtd dd | jd  d	7  < | j	| _nT|t
kr| jtd d
d | j| j | j| _n| jd  d| 7  < | j	| _dS )Nr   r   r,   z--!rq   r)   rr   r*   u   --!�zeof-in-comment-end-bang-stateT)r   rA   r-   rB   r    r   r   r   r   r   r   r   r'   r'   r(   r     s,    




z!HTMLTokenizer.commentEndBangStatec                 C   s   | j  }|tkr| j| _nj|tkr\| jtd dd d| j	d< | j| j	 | j
| _n*| jtd dd | j | | j| _dS )Nr)   !expected-doctype-name-but-got-eofr*   Fr   zneed-space-after-doctypeT)r   rA   r
   beforeDoctypeNameStater   r   r-   rB   r   r    r   rH   r   r'   r'   r(   r     s     



zHTMLTokenizer.doctypeStatec                 C   s   | j  }|tkrn|dkrT| jtd dd d| jd< | j| j | j| _n|dkr| jtd dd d	| jd
< | j	| _nR|t
kr| jtd dd d| jd< | j| j | j| _n|| jd
< | j	| _dS )Nr   r)   z+expected-doctype-name-but-got-right-bracketr*   Fr   rq   rr   r9   rg   r   T)r   rA   r
   r-   rB   r   r    r   r   doctypeNameStater   r   r'   r'   r(   r   *  s4    







z$HTMLTokenizer.beforeDoctypeNameStatec                 C   s  | j  }|tkr2| jd t| jd< | j| _n|dkrh| jd t| jd< | j	| j | j
| _n|dkr| j	td dd | jd  d7  < | j| _nh|tkr| j	td dd d	| jd
< | jd t| jd< | j	| j | j
| _n| jd  |7  < dS )Nrg   r   rq   r)   rr   r*   r9   zeof-in-doctype-nameFr   T)r   rA   r
   r    rk   r   afterDoctypeNameStater   r-   rB   r   r   r   r   r   r'   r'   r(   r   D  s0    





zHTMLTokenizer.doctypeNameStatec                 C   sH  | j  }|tkrn.|dkr8| j| j | j| _n|tkrd| jd< | j 	| | jt
d dd | j| j | j| _n|dkrd}d	D ]}| j  }||krd} qq|r| j| _dS nD|d
kr
d}dD ]}| j  }||krd} qq|r
| j| _dS | j 	| | jt
d dd|id d| jd< | j| _dS )Nr   Fr   r)   eof-in-doctyper*   r   T))uU)bB)lL)iIr   sS)r   r   r   r   )mMz*expected-space-or-right-bracket-in-doctyper,   r7   )r   rA   r
   r-   rB   r    r   r   r   rH   r   afterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r#   r,   r   r   r'   r'   r(   r   ]  sT    






z#HTMLTokenizer.afterDoctypeNameStatec                 C   s   | j  }|tkr| j| _n|dkrP| jtd dd | j | | j| _nT|t	kr| jtd dd d| j
d< | j| j
 | j| _n| j | | j| _dS 	N)r   r   r)   unexpected-char-in-doctyper*   r   Fr   T)r   rA   r
   "beforeDoctypePublicIdentifierStater   r-   rB   r   rH   r   r    r   r   r'   r'   r(   r     s&    




z,HTMLTokenizer.afterDoctypePublicKeywordStatec                 C   s   | j  }|tkrn|dkr0d| jd< | j| _n|dkrLd| jd< | j| _n|dkr| jt	d dd d	| jd
< | j| j | j
| _nh|tkr| jt	d dd d	| jd
< | j| j | j
| _n(| jt	d dd d	| jd
< | j| _dS )Nr   r5   r   r   r   r)   unexpected-end-of-doctyper*   Fr   r   r  T)r   rA   r
   r    (doctypePublicIdentifierDoubleQuotedStater   (doctypePublicIdentifierSingleQuotedStater-   rB   r   r   r   r  r   r'   r'   r(   r    s:    









z0HTMLTokenizer.beforeDoctypePublicIdentifierStatec                 C   s   | j  }|dkr| j| _n|dkrN| jtd dd | jd  d7  < n|dkr| jtd d	d d
| jd< | j| j | j| _nR|t	kr| jtd dd d
| jd< | j| j | j| _n| jd  |7  < dS )Nr   rq   r)   rr   r*   r   r9   r   r  Fr   r   T
r   rA   !afterDoctypePublicIdentifierStater   r-   rB   r   r    r   r   r   r'   r'   r(   r    s0    





z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatec                 C   s   | j  }|dkr| j| _n|dkrN| jtd dd | jd  d7  < n|dkr| jtd d	d d
| jd< | j| j | j| _nR|t	kr| jtd dd d
| jd< | j| j | j| _n| jd  |7  < dS )Nr   rq   r)   rr   r*   r   r9   r   r  Fr   r   Tr  r   r'   r'   r(   r    s0    





z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatec                 C   s  | j  }|tkr| j| _n|dkr<| j| j | j| _n|dkrn| jt	d dd d| jd< | j
| _n|dkr| jt	d dd d| jd< | j| _nh|tkr| jt	d d	d d
| jd< | j| j | j| _n(| jt	d dd d
| jd< | j| _dS )Nr   r   r)   r  r*   r5   r   r   r   Fr   T)r   rA   r
   -betweenDoctypePublicAndSystemIdentifiersStater   r-   rB   r    r   r   (doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStater   r  r   r'   r'   r(   r	    s>    









z/HTMLTokenizer.afterDoctypePublicIdentifierStatec                 C   s   | j  }|tkrn|dkr4| j| j | j| _n|dkrPd| jd< | j| _n|dkrld| jd< | j	| _nh|t
kr| jtd dd d	| jd
< | j| j | j| _n(| jtd dd d	| jd
< | j| _dS )Nr   r   r5   r   r   r)   r   r*   Fr   r  T)r   rA   r
   r-   rB   r    r   r   r  r  r   r   r  r   r'   r'   r(   r
    s2    








z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatec                 C   s   | j  }|tkr| j| _n|dkrP| jtd dd | j | | j| _nT|t	kr| jtd dd d| j
d< | j| j
 | j| _n| j | | j| _dS r  )r   rA   r
   "beforeDoctypeSystemIdentifierStater   r-   rB   r   rH   r   r    r   r   r'   r'   r(   r   )  s&    




z,HTMLTokenizer.afterDoctypeSystemKeywordStatec                 C   s   | j  }|tkrn|dkr0d| jd< | j| _n|dkrLd| jd< | j| _n|dkr| jt	d dd d	| jd
< | j| j | j
| _nh|tkr| jt	d dd d	| jd
< | j| j | j
| _n(| jt	d dd d	| jd
< | j| _dS )Nr   r5   r   r   r   r)   r  r*   Fr   r   T)r   rA   r
   r    r  r   r  r-   rB   r   r   r   r  r   r'   r'   r(   r  =  s:    









z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatec                 C   s   | j  }|dkr| j| _n|dkrN| jtd dd | jd  d7  < n|dkr| jtd d	d d
| jd< | j| j | j| _nR|t	kr| jtd dd d
| jd< | j| j | j| _n| jd  |7  < dS )Nr   rq   r)   rr   r*   r   r9   r   r  Fr   r   T
r   rA   !afterDoctypeSystemIdentifierStater   r-   rB   r   r    r   r   r   r'   r'   r(   r  Z  s0    





z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatec                 C   s   | j  }|dkr| j| _n|dkrN| jtd dd | jd  d7  < n|dkr| jtd d	d d
| jd< | j| j | j| _nR|t	kr| jtd dd d
| jd< | j| j | j| _n| jd  |7  < dS )Nr   rq   r)   rr   r*   r   r9   r   r  Fr   r   Tr  r   r'   r'   r(   r  r  s0    





z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatec                 C   s   | j  }|tkrn~|dkr4| j| j | j| _n^|tkrt| jt	d dd d| jd< | j| j | j| _n| jt	d dd | j
| _dS )	Nr   r)   r   r*   Fr   r  T)r   rA   r
   r-   rB   r    r   r   r   r   r  r   r'   r'   r(   r    s$    



z/HTMLTokenizer.afterDoctypeSystemIdentifierStatec                 C   sZ   | j  }|dkr*| j| j | j| _n,|tkrV| j | | j| j | j| _n dS )Nr   T)	r   rA   r-   rB   r    r   r   r   rH   r   r'   r'   r(   r    s    


zHTMLTokenizer.bogusDoctypeStatec                 C   s   g }| | jd | | jd | j }|tkr>qq|dksJt|d dd  dkrv|d d d |d< qq| | qd|}|d}|dkrt|D ]}| j	 t
d	 d
d q|dd}|r| j	 t
d |d | j| _dS )N]r   rS   z]]r5   rq   r   r)   rr   r*   r9   rX   T)rB   r   rv   rA   r   AssertionErrorrD   countranger-   r   r   r   r   )r#   r,   rA   	nullCountr   r'   r'   r(   r     s2    



zHTMLTokenizer.cdataSectionState)N)NF)N__name__
__module____qualname____doc__r"   r2   rO   re   rf   rp   r   rt   r|   rz   r   r   r   ru   r   r   r{   r   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r	  r
  r   r  r  r  r  r  r   __classcell__r'   r'   r%   r(   r      s   H
P#

6 "-3r   N) 
__future__r   r   r   Zpip._vendor.sixr   rF   collectionsr   r   sysr   	constantsr
   r   r   r   r   r   r   r   r   r   _inputstreamr   _trier   rY   dictrl   objectr   r'   r'   r'   r(   <module>   s    