403Webshell
Server IP : 192.158.238.246  /  Your IP : 18.220.224.115
Web Server : LiteSpeed
System : Linux uniform.iwebfusion.net 4.18.0-553.27.1.lve.1.el8.x86_64 #1 SMP Wed Nov 20 15:58:00 UTC 2024 x86_64
User : jenniferflocom ( 1321)
PHP Version : 8.1.32
Disable Function : NONE
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : OFF  |  Pkexec : OFF
Directory :  /opt/alt/python37/lib/python3.7/site-packages/html5lib/__pycache__/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /opt/alt/python37/lib/python3.7/site-packages/html5lib/__pycache__/_tokenizer.cpython-37.pyc
B

63)Z+�@s�ddlmZmZmZddlmZddlmZddl	m
Z
ddl	mZddl	mZm
Z
ddl	mZmZmZdd	l	mZmZdd
l	mZddlmZddlmZee�ZGd
d�de�ZdS)�)�absolute_import�division�unicode_literals)�unichr)�deque�)�spaceCharacters)�entities)�asciiLetters�asciiUpper2Lower)�digits�	hexDigits�EOF)�
tokenTypes�
tagTokenTypes)�replacementCharacters)�HTMLInputStream)�TriecsdeZdZdZd��fdd�	Zdd�Zdd�Zd�d
d�Zdd
�Zdd�Z	dd�Z
dd�Zdd�Zdd�Z
dd�Zdd�Zdd�Zdd�Zd d!�Zd"d#�Zd$d%�Zd&d'�Zd(d)�Zd*d+�Zd,d-�Zd.d/�Zd0d1�Zd2d3�Zd4d5�Zd6d7�Zd8d9�Zd:d;�Zd<d=�Z d>d?�Z!d@dA�Z"dBdC�Z#dDdE�Z$dFdG�Z%dHdI�Z&dJdK�Z'dLdM�Z(dNdO�Z)dPdQ�Z*dRdS�Z+dTdU�Z,dVdW�Z-dXdY�Z.dZd[�Z/d\d]�Z0d^d_�Z1d`da�Z2dbdc�Z3ddde�Z4dfdg�Z5dhdi�Z6djdk�Z7dldm�Z8dndo�Z9dpdq�Z:drds�Z;dtdu�Z<dvdw�Z=dxdy�Z>dzd{�Z?d|d}�Z@d~d�ZAd�d��ZBd�d��ZCd�d��ZDd�d��ZEd�d��ZFd�d��ZGd�d��ZHd�d��ZId�d��ZJd�d��ZKd�d��ZL�ZMS)��
HTMLTokenizera	 This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    NcsFt|f|�|_||_d|_g|_|j|_d|_d|_t	t
|���dS)NF)r�stream�parserZ
escapeFlagZ
lastFourChars�	dataState�state�escape�currentToken�superr�__init__)�selfrr�kwargs)�	__class__��D/opt/alt/python37/lib/python3.7/site-packages/html5lib/_tokenizer.pyr"szHTMLTokenizer.__init__ccs\tg�|_xL|��rVx&|jjr:td|jj�d�d�VqWx|jrR|j��Vq>WqWdS)z� This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        �
ParseErrorr)�type�dataN)r�
tokenQueuerr�errorsr�pop�popleft)rr r r!�__iter__1s


zHTMLTokenizer.__iter__c	%Cst}d}|rt}d}g}|j��}x(||krJ|tk	rJ|�|�|j��}q$Wtd�|�|�}|tkr�t|}|j	�t
ddd|id���nbd|kr�d	ks�n|d
kr�d}|j	�t
ddd|id���n d|kr�d
k�s�nd|kr�dk�s�nd|k�rdk�s�nd|k�r0dk�s�n|tddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d
g#�k�r�|j	�t
ddd|id��yt|�}Wn>t
k
�r�|d6}td|d?B�td7|d8@B�}YnX|d9k�r|j	�t
dd:d;��|j�|�|S)<z�This function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        �
��r"z$illegal-codepoint-for-numeric-entity�	charAsInt)r#r$�datavarsi�i��i��u�r�����i�i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��	i��	i��
i��
i��i��i��i��i��
i��
i��i��i��i��i��ii�i��;z numeric-entity-without-semicolon)r#r$)rr
r�charr�append�int�joinrr%r�	frozenset�chr�
ValueError�unget)	rZisHexZallowed�radix�	charStack�cr-r6�vr r r!�consumeNumberEntityAs`

&

z!HTMLTokenizer.consumeNumberEntityFc	Cs�d}|j��g}|dtksB|dtddfksB|dk	rV||dkrV|j�|d��n |ddk�rd}|�|j���|ddkr�d}|�|j���|r�|dtks�|s�|dtkr�|j�|d�|�|�}n4|j	�t
d	d
d��|j�|���dd�|�}�njx8|dtk	�rDt
�d�|���s0P|�|j����qWy$t
�d�|dd���}t|�}Wntk
�r�d}YnX|dk	�rB|dd
k�r�|j	�t
d	dd��|dd
k�r|�r||tk�s�||tk�s�||dk�r|j�|���dd�|�}n.t|}|j�|���|d�||d��7}n4|j	�t
d	dd��|j�|���dd�|�}|�r�|jddd|7<n*|tk�r�d}nd}|j	�t
||d��dS)N�&r�<�#F���)�x�XTr"zexpected-numeric-entity)r#r$r,r5znamed-entity-without-semicolon�=zexpected-named-entityr$r�SpaceCharacters�
Characters)rr6rrr=r7r
rrBr%rr'r9�entitiesTrieZhas_keys_with_prefixZlongest_prefix�len�KeyErrorr
r	r)	r�allowedChar�
fromAttribute�outputr?�hexZ
entityNameZentityLengthZ	tokenTyper r r!�
consumeEntity�sf





zHTMLTokenizer.consumeEntitycCs|j|dd�dS)zIThis method replaces the need for "entityInAttributeValueState".
        T)rOrPN)rS)rrOr r r!�processEntityInAttribute�sz&HTMLTokenizer.processEntityInAttributecCs�|j}|dtkrp|d�t�|d<|dtdkrp|drR|j�tddd��|drp|j�tdd	d��|j�|�|j|_d
S)z�This method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r#�name�EndTagr$r"zattributes-in-end-tag)r#r$�selfClosingzself-closing-flag-on-end-tagN)	rr�	translaterrr%r7rr)r�tokenr r r!�emitCurrentToken�s

zHTMLTokenizer.emitCurrentTokencCs�|j��}|dkr|j|_n�|dkr.|j|_n�|dkrd|j�tddd��|j�tddd��n`|tkrpdS|t	kr�|j�td	||j�
t	d
�d��n&|j�
d�}|j�td||d��d
S)NrCrD�r"zinvalid-codepoint)r#r$rKFrJT)rCrDr[)rr6�entityDataStater�tagOpenStater%r7rrr�
charsUntil)rr$�charsr r r!r�s&



zHTMLTokenizer.dataStatecCs|��|j|_dS)NT)rSrr)rr r r!r\szHTMLTokenizer.entityDataStatecCs�|j��}|dkr|j|_n�|dkr.|j|_n�|tkr:dS|dkrp|j�tddd��|j�tdd	d��nT|t	kr�|j�td
||j�
t	d�d��n&|j�
d�}|j�td||d��dS)
NrCrDFr[r"zinvalid-codepoint)r#r$rKu�rJT)rCrDr[)rr6�characterReferenceInRcdatar�rcdataLessThanSignStaterr%r7rrr^)rr$r_r r r!�rcdataStates&



zHTMLTokenizer.rcdataStatecCs|��|j|_dS)NT)rSrbr)rr r r!r`1sz(HTMLTokenizer.characterReferenceInRcdatacCs�|j��}|dkr|j|_nh|dkrR|j�tddd��|j�tddd��n2|tkr^dS|j�d	�}|j�td||d��d
S)NrDr[r"zinvalid-codepoint)r#r$rKu�F)rDr[T)	rr6�rawtextLessThanSignStaterr%r7rrr^)rr$r_r r r!�rawtextState6s


zHTMLTokenizer.rawtextStatecCs�|j��}|dkr|j|_nh|dkrR|j�tddd��|j�tddd��n2|tkr^dS|j�d	�}|j�td||d��d
S)NrDr[r"zinvalid-codepoint)r#r$rKu�F)rDr[T)	rr6�scriptDataLessThanSignStaterr%r7rrr^)rr$r_r r r!�scriptDataStateHs


zHTMLTokenizer.scriptDataStatecCsr|j��}|tkrdS|dkrL|j�tddd��|j�tddd��n"|j�td||j�d�d��dS)	NFr[r"zinvalid-codepoint)r#r$rKu�T)rr6rr%r7rr^)rr$r r r!�plaintextStateZs

zHTMLTokenizer.plaintextStatecCs|j��}|dkr|j|_n�|dkr.|j|_n�|tkrVtd|gddd�|_|j|_n�|dkr�|j	�
tddd	��|j	�
td
dd	��|j|_nt|dkr�|j	�
tdd
d	��|j�|�|j
|_n@|j	�
tddd	��|j	�
td
dd	��|j�|�|j|_dS)N�!�/ZStartTagF)r#rUr$rWZselfClosingAcknowledged�>r"z'expected-tag-name-but-got-right-bracket)r#r$rKz<>�?z'expected-tag-name-but-got-question-markzexpected-tag-namerDT)rr6�markupDeclarationOpenStater�closeTagOpenStater
rr�tagNameStater%r7rr=�bogusCommentState)rr$r r r!r]is6









zHTMLTokenizer.tagOpenStatecCs�|j��}|tkr0td|gdd�|_|j|_n�|dkrX|j�tddd��|j	|_nn|t
kr�|j�tddd��|j�td	d
d��|j	|_n0|j�tddd|id
��|j�|�|j|_dS)NrVF)r#rUr$rWrjr"z*expected-closing-tag-but-got-right-bracket)r#r$z expected-closing-tag-but-got-eofrKz</z!expected-closing-tag-but-got-charr$)r#r$r.T)
rr6r
rrrnrr%r7rrr=ro)rr$r r r!rm�s(





zHTMLTokenizer.closeTagOpenStatecCs�|j��}|tkr|j|_n�|dkr.|��n~|tkrV|j�t	ddd��|j
|_nV|dkrh|j|_nD|dkr�|j�t	ddd��|jdd	7<n|jd|7<d
S)Nrjr"zeof-in-tag-name)r#r$rir[zinvalid-codepointrUu�T)
rr6r�beforeAttributeNameStaterrZrr%r7rr�selfClosingStartTagStater)rr$r r r!rn�s"






zHTMLTokenizer.tagNameStatecCsP|j��}|dkr"d|_|j|_n*|j�tddd��|j�|�|j	|_dS)Nrir,rKrD)r#r$T)
rr6�temporaryBuffer�rcdataEndTagOpenStaterr%r7rr=rb)rr$r r r!ra�s

z%HTMLTokenizer.rcdataLessThanSignStatecCsX|j��}|tkr*|j|7_|j|_n*|j�tddd��|j�	|�|j
|_dS)NrKz</)r#r$T)rr6r
rr�rcdataEndTagNameStaterr%r7rr=rb)rr$r r r!rs�s

z#HTMLTokenizer.rcdataEndTagOpenStatecCs|jo|jd��|j��k}|j��}|tkrT|rTtd|jgdd�|_|j|_n�|dkr�|r�td|jgdd�|_|j	|_n||dkr�|r�td|jgdd�|_|�
�|j|_nH|tkr�|j|7_n0|j
�tdd|jd	��|j�|�|j|_d
S)NrUrVF)r#rUr$rWrirjrKz</)r#r$T)r�lowerrrrr6rrrprrqrZrr
r%r7r=rb)r�appropriater$r r r!rt�s2



z#HTMLTokenizer.rcdataEndTagNameStatecCsP|j��}|dkr"d|_|j|_n*|j�tddd��|j�|�|j	|_dS)Nrir,rKrD)r#r$T)
rr6rr�rawtextEndTagOpenStaterr%r7rr=rd)rr$r r r!rc�s

z&HTMLTokenizer.rawtextLessThanSignStatecCsX|j��}|tkr*|j|7_|j|_n*|j�tddd��|j�	|�|j
|_dS)NrKz</)r#r$T)rr6r
rr�rawtextEndTagNameStaterr%r7rr=rd)rr$r r r!rw�s

z$HTMLTokenizer.rawtextEndTagOpenStatecCs|jo|jd��|j��k}|j��}|tkrT|rTtd|jgdd�|_|j|_n�|dkr�|r�td|jgdd�|_|j	|_n||dkr�|r�td|jgdd�|_|�
�|j|_nH|tkr�|j|7_n0|j
�tdd|jd	��|j�|�|j|_d
S)NrUrVF)r#rUr$rWrirjrKz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=rd)rrvr$r r r!rxs2



z$HTMLTokenizer.rawtextEndTagNameStatecCsx|j��}|dkr"d|_|j|_nR|dkrJ|j�tddd��|j|_n*|j�tddd��|j�	|�|j
|_dS)	Nrir,rhrKz<!)r#r$rDT)rr6rr�scriptDataEndTagOpenStaterr%r7r�scriptDataEscapeStartStater=rf)rr$r r r!res


z)HTMLTokenizer.scriptDataLessThanSignStatecCsX|j��}|tkr*|j|7_|j|_n*|j�tddd��|j�	|�|j
|_dS)NrKz</)r#r$T)rr6r
rr�scriptDataEndTagNameStaterr%r7rr=rf)rr$r r r!ry,s

z'HTMLTokenizer.scriptDataEndTagOpenStatecCs|jo|jd��|j��k}|j��}|tkrT|rTtd|jgdd�|_|j|_n�|dkr�|r�td|jgdd�|_|j	|_n||dkr�|r�td|jgdd�|_|�
�|j|_nH|tkr�|j|7_n0|j
�tdd|jd	��|j�|�|j|_d
S)NrUrVF)r#rUr$rWrirjrKz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=rf)rrvr$r r r!r{7s2



z'HTMLTokenizer.scriptDataEndTagNameStatecCsJ|j��}|dkr2|j�tddd��|j|_n|j�|�|j|_dS)N�-rK)r#r$T)	rr6r%r7r�scriptDataEscapeStartDashStaterr=rf)rr$r r r!rzSs

z(HTMLTokenizer.scriptDataEscapeStartStatecCsJ|j��}|dkr2|j�tddd��|j|_n|j�|�|j|_dS)Nr|rK)r#r$T)	rr6r%r7r�scriptDataEscapedDashDashStaterr=rf)rr$r r r!r}]s

z,HTMLTokenizer.scriptDataEscapeStartDashStatecCs�|j��}|dkr2|j�tddd��|j|_n�|dkrD|j|_nn|dkrz|j�tddd��|j�tddd��n8|tkr�|j	|_n&|j�
d	�}|j�td||d��d
S)Nr|rK)r#r$rDr[r"zinvalid-codepointu�)rDr|r[T)rr6r%r7r�scriptDataEscapedDashStater�"scriptDataEscapedLessThanSignStaterrr^)rr$r_r r r!�scriptDataEscapedStategs"




z$HTMLTokenizer.scriptDataEscapedStatecCs�|j��}|dkr2|j�tddd��|j|_n�|dkrD|j|_nn|dkr�|j�tddd��|j�tddd��|j|_n0|t	kr�|j
|_n|j�td|d��|j|_d	S)
Nr|rK)r#r$rDr[r"zinvalid-codepointu�T)rr6r%r7rr~rr�r�rr)rr$r r r!r{s"






z(HTMLTokenizer.scriptDataEscapedDashStatecCs�|j��}|dkr*|j�tddd��n�|dkr<|j|_n�|dkrd|j�tddd��|j|_nn|dkr�|j�tddd��|j�tdd	d��|j|_n0|t	kr�|j
|_n|j�td|d��|j|_d
S)Nr|rK)r#r$rDrjr[r"zinvalid-codepointu�T)rr6r%r7rr�rrfr�rr)rr$r r r!r~�s&






z,HTMLTokenizer.scriptDataEscapedDashDashStatecCs�|j��}|dkr"d|_|j|_n\|tkrT|j�tdd|d��||_|j	|_n*|j�tddd��|j�
|�|j|_dS)Nrir,rKrD)r#r$T)rr6rr� scriptDataEscapedEndTagOpenStaterr
r%r7r� scriptDataDoubleEscapeStartStater=r�)rr$r r r!r��s


z0HTMLTokenizer.scriptDataEscapedLessThanSignStatecCsP|j��}|tkr"||_|j|_n*|j�tddd��|j�	|�|j
|_dS)NrKz</)r#r$T)rr6r
rr� scriptDataEscapedEndTagNameStaterr%r7rr=r�)rr$r r r!r��s

z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatecCs|jo|jd��|j��k}|j��}|tkrT|rTtd|jgdd�|_|j|_n�|dkr�|r�td|jgdd�|_|j	|_n||dkr�|r�td|jgdd�|_|�
�|j|_nH|tkr�|j|7_n0|j
�tdd|jd	��|j�|�|j|_d
S)NrUrVF)r#rUr$rWrirjrKz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=r�)rrvr$r r r!r��s2



z.HTMLTokenizer.scriptDataEscapedEndTagNameStatecCs�|j��}|ttd�BkrR|j�td|d��|j��dkrH|j	|_
q�|j|_
nB|tkr�|j�td|d��|j|7_n|j�
|�|j|_
dS)N)rirjrK)r#r$�scriptT)rr6rr:r%r7rrrru�scriptDataDoubleEscapedStaterr�r
r=)rr$r r r!r��s


z.HTMLTokenizer.scriptDataDoubleEscapeStartStatecCs�|j��}|dkr2|j�tddd��|j|_n�|dkrZ|j�tddd��|j|_nt|dkr�|j�tddd��|j�tddd��n>|tkr�|j�tdd	d��|j	|_n|j�td|d��d
S)Nr|rK)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)
rr6r%r7r� scriptDataDoubleEscapedDashStater�(scriptDataDoubleEscapedLessThanSignStaterr)rr$r r r!r��s$





z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs�|j��}|dkr2|j�tddd��|j|_n�|dkrZ|j�tddd��|j|_n�|dkr�|j�tddd��|j�tddd��|j|_nF|t	kr�|j�tdd	d��|j
|_n|j�td|d��|j|_d
S)Nr|rK)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)rr6r%r7r�$scriptDataDoubleEscapedDashDashStaterr�r�rr)rr$r r r!r�s(







z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|j��}|dkr*|j�tddd��n�|dkrR|j�tddd��|j|_n�|dkrz|j�tddd��|j|_n�|dkr�|j�tddd��|j�tdd	d��|j|_nF|t	kr�|j�tdd
d��|j
|_n|j�td|d��|j|_dS)Nr|rK)r#r$rDrjr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)rr6r%r7rr�rrfr�rr)rr$r r r!r�s,







z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|j��}|dkr8|j�tddd��d|_|j|_n|j�|�|j	|_dS)NrirK)r#r$r,T)
rr6r%r7rrr�scriptDataDoubleEscapeEndStaterr=r�)rr$r r r!r�0s

z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs�|j��}|ttd�BkrR|j�td|d��|j��dkrH|j	|_
q�|j|_
nB|tkr�|j�td|d��|j|7_n|j�
|�|j|_
dS)N)rirjrK)r#r$r�T)rr6rr:r%r7rrrrur�rr�r
r=)rr$r r r!r�;s


z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|j��}|tkr$|j�td��n|tkrJ|jd�|dg�|j|_n�|dkr\|�	�n�|dkrn|j
|_n�|dkr�|j�tddd	��|jd�|dg�|j|_n�|d
kr�|j�tddd	��|jd�ddg�|j|_nF|t
k�r|j�tdd
d	��|j|_n|jd�|dg�|j|_dS)NTr$r,rjri)�'�"rIrDr"z#invalid-character-in-attribute-name)r#r$r[zinvalid-codepointu�z#expected-attribute-name-but-got-eof)rr6rr^r
rr7�attributeNameStaterrZrqr%rrr)rr$r r r!rpKs6










z&HTMLTokenizer.beforeAttributeNameStatecCs�|j��}d}d}|dkr&|j|_�n.|tkr\|jddd||j�td�7<d}n�|dkrjd}n�|tkr||j|_n�|dkr�|j	|_n�|d	kr�|j
�td
dd��|jdddd
7<d}n�|dk�r|j
�td
dd��|jddd|7<d}nH|t
k�r6|j
�td
dd��|j|_n|jddd|7<d}|�r�|jddd�t�|jddd<xP|jddd�D]:\}}|jddd|k�r�|j
�td
dd��P�q�W|�r�|��dS)NTFrIr$rFrrjrir[r"zinvalid-codepoint)r#r$u�)r�r�rDz#invalid-character-in-attribute-namezeof-in-attribute-namezduplicate-attribute)rr6�beforeAttributeValueStaterr
rr^r�afterAttributeNameStaterqr%r7rrrrXrrZ)rr$ZleavingThisStateZ	emitTokenrU�_r r r!r�isR








&
z HTMLTokenizer.attributeNameStatecCsD|j��}|tkr$|j�td��n|dkr8|j|_�n|dkrJ|��n�|tkrp|jd�	|dg�|j
|_n�|dkr�|j|_n�|dkr�|j�	t
dd	d
��|jd�	ddg�|j
|_n�|dkr�|j�	t
dd
d
��|jd�	|dg�|j
|_nF|tk�r$|j�	t
ddd
��|j|_n|jd�	|dg�|j
|_dS)NTrIrjr$r,rir[r"zinvalid-codepoint)r#r$u�)r�r�rDz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)rr6rr^r�rrZr
rr7r�rqr%rrr)rr$r r r!r��s:










z%HTMLTokenizer.afterAttributeNameStatecCsh|j��}|tkr$|j�td��n@|dkr8|j|_�n,|dkrX|j|_|j�|��n|dkrj|j|_n�|dkr�|j	�
tddd��|��n�|d	kr�|j	�
tdd
d��|j
ddd
d7<|j|_n�|dk�r|j	�
tddd��|j
ddd
|7<|j|_nL|tk�rB|j	�
tddd��|j|_n"|j
ddd
|7<|j|_dS)NTr�rCr�rjr"z.expected-attribute-value-but-got-right-bracket)r#r$r[zinvalid-codepointr$rFru�)rIrD�`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)rr6rr^�attributeValueDoubleQuotedStater�attributeValueUnQuotedStater=�attributeValueSingleQuotedStater%r7rrZrrr)rr$r r r!r��s>











z'HTMLTokenizer.beforeAttributeValueStatecCs�|j��}|dkr|j|_n�|dkr0|�d�n�|dkrj|j�tddd��|jddd	d
7<nN|t	kr�|j�tddd��|j
|_n&|jddd	||j�d�7<d
S)Nr�rCr[r"zinvalid-codepoint)r#r$r$rFru�z#eof-in-attribute-value-double-quote)r�rCr[T)rr6�afterAttributeValueStaterrTr%r7rrrrr^)rr$r r r!r��s 




z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs�|j��}|dkr|j|_n�|dkr0|�d�n�|dkrj|j�tddd��|jddd	d
7<nN|t	kr�|j�tddd��|j
|_n&|jddd	||j�d�7<d
S)Nr�rCr[r"zinvalid-codepoint)r#r$r$rFru�z#eof-in-attribute-value-single-quote)r�rCr[T)rr6r�rrTr%r7rrrrr^)rr$r r r!r��s 




z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|j��}|tkr|j|_n�|dkr0|�d�n�|dkrB|��n�|dkr||j�t	ddd��|j
ddd	|7<n�|d
kr�|j�t	ddd��|j
ddd	d7<nV|tkr�|j�t	dd
d��|j|_n.|j
ddd	||j�
td�tB�7<dS)NrCrj)r�r�rIrDr�r"z0unexpected-character-in-unquoted-attribute-value)r#r$r$rFrr[zinvalid-codepointu�z eof-in-attribute-value-no-quotes)rCrjr�r�rIrDr�r[T)rr6rrprrTrZr%r7rrrrr^r:)rr$r r r!r�s,






z)HTMLTokenizer.attributeValueUnQuotedStatecCs�|j��}|tkr|j|_n�|dkr.|��np|dkr@|j|_n^|tkrt|j�	t
ddd��|j�|�|j|_n*|j�	t
ddd��|j�|�|j|_dS)Nrjrir"z$unexpected-EOF-after-attribute-value)r#r$z*unexpected-character-after-attribute-valueT)
rr6rrprrZrqrr%r7rr=r)rr$r r r!r� s"






z&HTMLTokenizer.afterAttributeValueStatecCs�|j��}|dkr&d|jd<|��n^|tkrZ|j�tddd��|j�|�|j	|_
n*|j�tddd��|j�|�|j|_
dS)NrjTrWr"z#unexpected-EOF-after-solidus-in-tag)r#r$z)unexpected-character-after-solidus-in-tag)rr6rrZrr%r7rr=rrrp)rr$r r r!rq4s





z&HTMLTokenizer.selfClosingStartTagStatecCsD|j�d�}|�dd�}|j�td|d��|j��|j|_dS)Nrjr[u��Comment)r#r$T)	rr^�replacer%r7rr6rr)rr$r r r!roFs
zHTMLTokenizer.bogusCommentStatecCs�|j��g}|ddkrR|�|j���|ddkrPtddd�|_|j|_dSn�|ddkr�d}x.dD]&}|�|j���|d|krhd	}PqhW|r�td
ddddd�|_|j|_dSn�|ddk�rF|jdk	�rF|jj	j
�rF|jj	j
dj|jj	jk�rFd}x2d
D]*}|�|j���|d|k�rd	}P�qW|�rF|j
|_dS|j�tddd��x|�rx|j�|����q^W|j|_dS)NrFr|r�r,)r#r$T)�d�D))�o�O)r@�C)�t�T)�y�Y)�p�P)�e�EFZDoctype)r#rU�publicId�systemId�correct�[)r�r��Ar�r�r�r"zexpected-dashes-or-doctype)rr6r7rr�commentStartStater�doctypeStaterZtreeZopenElements�	namespaceZdefaultNamespace�cdataSectionStater%r=r'ro)rr?�matched�expectedr r r!rlUsP



z(HTMLTokenizer.markupDeclarationOpenStatecCs�|j��}|dkr|j|_n�|dkrN|j�tddd��|jdd7<n�|dkr�|j�tdd	d��|j�|j�|j|_nP|t	kr�|j�tdd
d��|j�|j�|j|_n|jd|7<|j
|_dS)Nr|r[r"zinvalid-codepoint)r#r$r$u�rjzincorrect-commentzeof-in-commentT)rr6�commentStartDashStaterr%r7rrrr�commentState)rr$r r r!r��s(






zHTMLTokenizer.commentStartStatecCs�|j��}|dkr|j|_n�|dkrN|j�tddd��|jdd7<n�|dkr�|j�tdd	d��|j�|j�|j|_nT|t	kr�|j�tdd
d��|j�|j�|j|_n|jdd|7<|j
|_dS)Nr|r[r"zinvalid-codepoint)r#r$r$u-�rjzincorrect-commentzeof-in-commentT)rr6�commentEndStaterr%r7rrrrr�)rr$r r r!r��s(






z#HTMLTokenizer.commentStartDashStatecCs�|j��}|dkr|j|_n�|dkrN|j�tddd��|jdd7<nT|tkr�|j�tddd��|j�|j�|j	|_n|jd||j�
d	�7<d
S)Nr|r[r"zinvalid-codepoint)r#r$r$u�zeof-in-comment)r|r[T)rr6�commentEndDashStaterr%r7rrrrr^)rr$r r r!r��s




zHTMLTokenizer.commentStatecCs�|j��}|dkr|j|_n�|dkrV|j�tddd��|jdd7<|j|_nT|t	kr�|j�tddd��|j�|j�|j
|_n|jdd|7<|j|_d	S)
Nr|r[r"zinvalid-codepoint)r#r$r$u-�zeof-in-comment-end-dashT)rr6r�rr%r7rrr�rr)rr$r r r!r��s 





z!HTMLTokenizer.commentEndDashStatecCs,|j��}|dkr*|j�|j�|j|_n�|dkrd|j�tddd��|jdd7<|j|_n�|dkr�|j�tdd	d��|j	|_n�|d
kr�|j�tddd��|jd|7<nj|t
kr�|j�tddd��|j�|j�|j|_n4|j�tdd
d��|jdd|7<|j|_dS)Nrjr[r"zinvalid-codepoint)r#r$r$u--�rhz,unexpected-bang-after-double-dash-in-commentr|z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)rr6r%r7rrrrr��commentEndBangStater)rr$r r r!r��s6









zHTMLTokenizer.commentEndStatecCs�|j��}|dkr*|j�|j�|j|_n�|dkrN|jdd7<|j|_n�|dkr�|j�tddd��|jdd	7<|j	|_nT|t
kr�|j�tdd
d��|j�|j�|j|_n|jdd|7<|j	|_dS)Nrjr|r$z--!r[r"zinvalid-codepoint)r#r$u--!�zeof-in-comment-end-bang-stateT)rr6r%r7rrrr�rr�r)rr$r r r!r��s(






z!HTMLTokenizer.commentEndBangStatecCs�|j��}|tkr|j|_nj|tkr\|j�tddd��d|j	d<|j�|j	�|j
|_n*|j�tddd��|j�|�|j|_dS)Nr"z!expected-doctype-name-but-got-eof)r#r$Fr�zneed-space-after-doctypeT)rr6r�beforeDoctypeNameStaterrr%r7rrrr=)rr$r r r!r�s





zHTMLTokenizer.doctypeStatecCs�|j��}|tkrn�|dkrT|j�tddd��d|jd<|j�|j�|j|_n�|dkr�|j�tddd��d	|jd
<|j	|_nR|t
kr�|j�tddd��d|jd<|j�|j�|j|_n||jd
<|j	|_dS)
Nrjr"z+expected-doctype-name-but-got-right-bracket)r#r$Fr�r[zinvalid-codepointu�rUz!expected-doctype-name-but-got-eofT)rr6rr%r7rrrr�doctypeNameStater)rr$r r r!r�s.










z$HTMLTokenizer.beforeDoctypeNameStatecCs|j��}|tkr2|jd�t�|jd<|j|_n�|dkrh|jd�t�|jd<|j�	|j�|j
|_n�|dkr�|j�	tddd��|jdd7<|j|_nh|t
kr�|j�	tddd��d	|jd
<|jd�t�|jd<|j�	|j�|j
|_n|jd|7<dS)NrUrjr[r"zinvalid-codepoint)r#r$u�zeof-in-doctype-nameFr�T)rr6rrrXr�afterDoctypeNameStaterr%r7rrr�r)rr$r r r!r�6s,







zHTMLTokenizer.doctypeNameStatecCsL|j��}|tkr�n2|dkr8|j�|j�|j|_�n|tkr�d|jd<|j�	|�|j�t
ddd��|j�|j�|j|_n�|dkr�d}x$d	D]}|j��}||kr�d}Pq�W|r�|j|_dSnF|d
k�rd}x$dD]}|j��}||kr�d}Pq�W|�r|j|_dS|j�	|�|j�t
ddd
|id��d|jd<|j
|_dS)NrjFr�r"zeof-in-doctype)r#r$)r�r�T))�u�U)�b�B)�l�L)�i�I)r@r�)�s�S))r�r�)r�r�)r�r�)r�r�)�m�Mz*expected-space-or-right-bracket-in-doctyper$)r#r$r.)rr6rr%r7rrrrr=r�afterDoctypePublicKeywordState�afterDoctypeSystemKeywordState�bogusDoctypeState)rr$r�r�r r r!r�OsP









z#HTMLTokenizer.afterDoctypeNameStatecCs�|j��}|tkr|j|_n�|dkrP|j�tddd��|j�|�|j|_nT|t	kr�|j�tddd��d|j
d<|j�|j
�|j|_n|j�|�|j|_dS)	N)r�r�r"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFr�T)rr6r�"beforeDoctypePublicIdentifierStaterr%r7rr=rrr)rr$r r r!r��s"






z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs�|j��}|tkrn�|dkr0d|jd<|j|_n�|dkrLd|jd<|j|_n�|dkr�|j�t	ddd��d	|jd
<|j�|j�|j
|_nh|tkr�|j�t	ddd��d	|jd
<|j�|j�|j
|_n(|j�t	ddd��d	|jd
<|j|_d
S)Nr�r,r�r�rjr"zunexpected-end-of-doctype)r#r$Fr�zeof-in-doctypezunexpected-char-in-doctypeT)
rr6rr�(doctypePublicIdentifierDoubleQuotedStater�(doctypePublicIdentifierSingleQuotedStater%r7rrrr�)rr$r r r!r��s4












z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs�|j��}|dkr|j|_n�|dkrN|j�tddd��|jdd7<n�|dkr�|j�tdd	d��d
|jd<|j�|j�|j|_nR|t	kr�|j�tddd��d
|jd<|j�|j�|j|_n|jd|7<d
S)Nr�r[r"zinvalid-codepoint)r#r$r�u�rjzunexpected-end-of-doctypeFr�zeof-in-doctypeT)
rr6�!afterDoctypePublicIdentifierStaterr%r7rrrr)rr$r r r!r��s*








z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs�|j��}|dkr|j|_n�|dkrN|j�tddd��|jdd7<n�|dkr�|j�tdd	d��d
|jd<|j�|j�|j|_nR|t	kr�|j�tddd��d
|jd<|j�|j�|j|_n|jd|7<d
S)Nr�r[r"zinvalid-codepoint)r#r$r�u�rjzunexpected-end-of-doctypeFr�zeof-in-doctypeT)
rr6r�rr%r7rrrr)rr$r r r!r��s*








z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs|j��}|tkr|j|_n�|dkr<|j�|j�|j|_n�|dkrn|j�t	ddd��d|jd<|j
|_n�|dkr�|j�t	ddd��d|jd<|j|_nh|tkr�|j�t	dd	d��d
|jd<|j�|j�|j|_n(|j�t	ddd��d
|jd<|j
|_dS)
Nrjr�r"zunexpected-char-in-doctype)r#r$r,r�r�zeof-in-doctypeFr�T)rr6r�-betweenDoctypePublicAndSystemIdentifiersStaterr%r7rrr�(doctypeSystemIdentifierDoubleQuotedState�(doctypeSystemIdentifierSingleQuotedStaterr�)rr$r r r!r��s6













z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs�|j��}|tkrn�|dkr4|j�|j�|j|_n�|dkrPd|jd<|j|_n�|dkrld|jd<|j	|_nh|t
kr�|j�tddd��d	|jd
<|j�|j�|j|_n(|j�tddd��d	|jd
<|j|_dS)
Nrjr�r,r�r�r"zeof-in-doctype)r#r$Fr�zunexpected-char-in-doctypeT)
rr6rr%r7rrrr�r�rrr�)rr$r r r!r�s.










z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs�|j��}|tkr|j|_n�|dkrP|j�tddd��|j�|�|j|_nT|t	kr�|j�tddd��d|j
d<|j�|j
�|j|_n|j�|�|j|_dS)	N)r�r�r"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFr�T)rr6r�"beforeDoctypeSystemIdentifierStaterr%r7rr=rrr)rr$r r r!r�s"






z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs�|j��}|tkrn�|dkr0d|jd<|j|_n�|dkrLd|jd<|j|_n�|dkr�|j�t	ddd��d	|jd
<|j�|j�|j
|_nh|tkr�|j�t	ddd��d	|jd
<|j�|j�|j
|_n(|j�t	ddd��d	|jd
<|j|_dS)
Nr�r,r�r�rjr"zunexpected-char-in-doctype)r#r$Fr�zeof-in-doctypeT)
rr6rrr�rr�r%r7rrrr�)rr$r r r!r�/s4












z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs�|j��}|dkr|j|_n�|dkrN|j�tddd��|jdd7<n�|dkr�|j�tdd	d��d
|jd<|j�|j�|j|_nR|t	kr�|j�tddd��d
|jd<|j�|j�|j|_n|jd|7<d
S)Nr�r[r"zinvalid-codepoint)r#r$r�u�rjzunexpected-end-of-doctypeFr�zeof-in-doctypeT)
rr6�!afterDoctypeSystemIdentifierStaterr%r7rrrr)rr$r r r!r�Ls*








z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs�|j��}|dkr|j|_n�|dkrN|j�tddd��|jdd7<n�|dkr�|j�tdd	d��d
|jd<|j�|j�|j|_nR|t	kr�|j�tddd��d
|jd<|j�|j�|j|_n|jd|7<d
S)Nr�r[r"zinvalid-codepoint)r#r$r�u�rjzunexpected-end-of-doctypeFr�zeof-in-doctypeT)
rr6r�rr%r7rrrr)rr$r r r!r�ds*








z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs�|j��}|tkrn~|dkr4|j�|j�|j|_n^|tkrt|j�t	ddd��d|jd<|j�|j�|j|_n|j�t	ddd��|j
|_dS)	Nrjr"zeof-in-doctype)r#r$Fr�zunexpected-char-in-doctypeT)rr6rr%r7rrrrrr�)rr$r r r!r�|s 





z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|j��}|dkr*|j�|j�|j|_n,|tkrV|j�|�|j�|j�|j|_ndS)NrjT)	rr6r%r7rrrrr=)rr$r r r!r��s


zHTMLTokenizer.bogusDoctypeStatecCs�g}x�|�|j�d��|�|j�d��|j��}|tkr@Pq|dksLt�|ddd�dkrx|ddd�|d<Pq|�|�qWd�|�}|�d�}|dkr�x&t|�D]}|j	�t
d	d
d��q�W|�dd�}|r�|j	�t
d
|d��|j|_
dS)N�]rjrF���z]]r,r[rr"zinvalid-codepoint)r#r$u�rKT)r7rr^r6r�AssertionErrorr9�count�ranger%rr�rr)rr$r6Z	nullCountr�r r r!r��s0



zHTMLTokenizer.cdataSectionState)N)NF)N�__name__�
__module__�__qualname__�__doc__rr)rBrSrTrZrr\rbr`rdrfrgr]rmrnrarsrtrcrwrxreryr{rzr}r�rr~r�r�r�r�r�r�r�r�r�rpr�r�r�r�r�r�r�rqrorlr�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r��
__classcell__r r )rr!rs�H
P#

6 "-3rN)�
__future__rrr�sixrr;�collectionsr�	constantsrr	r
rrr
rrrrZ_inputstreamrZ_trierrL�objectrr r r r!�<module>s

Youez - 2016 - github.com/yon3zu
LinuXploit