3
0dJ                 @   s  d Z dgZddlZddlZddlmZmZmZmZm	Z	 ddl
mZmZ ddlZddlmZmZ ddl
mZmZmZmZ yddlmZ d	ZW n2 ek
r Z zdd
lmZ dZW Y ddZ[X nX G dd de	ZG dd dejZG dd deZ G dd dej!Z"G dd de"Z#dS )MITHTML5TreeBuilder    N)DetectsXMLParsedAsHTML
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributenonwhitespace_re)
namespacesprefixes)CommentDoctypeNavigableStringTag)_baseF)baseTc               @   sF   e Zd ZdZdZeeeegZdZ	dddZ
dd Zd	d
 Zdd ZdS )r   a  Use html5lib to build a tree.

    Note that this TreeBuilder does not support some features common
    to HTML TreeBuilders. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.

    * This TreeBuilder doesn't use different subclasses of NavigableString
      based on the name of the tag in which the string was found.

    * You can't use a SoupStrainer to parse only part of a document.
    html5libTNc             c   s4   || _ |rtjddd tj| |d d dfV  d S )NzjYou provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.   )
stacklevelF)user_specified_encodingwarningswarnr   Zwarn_if_markup_looks_like_xml)selfmarkupr   Zdocument_declared_encodingZexclude_encodings r   G/var/www/html/virt/lib/python3.6/site-packages/bs4/builder/_html5lib.pyprepare_markup@   s    
zHTML5TreeBuilder.prepare_markupc             C   s   | j jd k	rtjddd tj| jd}|| j_t	 }t
|tsZtrP| j|d< n
| j|d< |j|f|}t
|trzd |_n$|jjjd }t
|ts|j}||_d | j_d S )NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.   )r   )treeoverride_encodingencodingr   )soupZ
parse_onlyr   r   r   
HTMLParsercreate_treebuilderunderlying_builderparserdict
isinstancestrnew_html5libr   parseoriginal_encoding	tokenizerstreamcharEncodingname)r   r   r&   extra_kwargsdocr,   r   r   r   feedU   s&    



zHTML5TreeBuilder.feedc             C   s   t || j| jd| _| jS )N)store_line_numbers)TreeBuilderForHtml5libr"   r4   r%   )r   namespaceHTMLElementsr   r   r   r$   t   s    z#HTML5TreeBuilder.create_treebuilderc             C   s   d| S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html>r   )r   fragmentr   r   r   test_fragment_to_document{   s    z*HTML5TreeBuilder.test_fragment_to_document)NN)__name__
__module____qualname____doc__NAMEr   r   r   featuresZTRACKS_LINE_NUMBERSr   r3   r$   r8   r   r   r   r   r   *   s   
c                   sf   e Zd Zd fdd	Zdd Zdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Z  ZS )r5   NTc                sL   |r|| _ n ddlm} |dd|i|| _ tt| j| d | _|| _d S )Nr   )BeautifulSoup html.parserr4   )r@   rA   )r"   bs4r?   superr5   __init__r&   r4   )r   r6   r"   r4   kwargsr?   )	__class__r   r   rD      s    
zTreeBuilderForHtml5lib.__init__c             C   s   | j j  t| j | j d S )N)r"   resetElement)r   r   r   r   documentClass   s    
z$TreeBuilderForHtml5lib.documentClassc             C   s6   |d }|d }|d }t j|||}| jj| d S )Nr0   publicIdsystemId)r   Zfor_name_and_idsr"   object_was_parsed)r   tokenr0   rJ   rK   doctyper   r   r   insertDoctype   s
    z$TreeBuilderForHtml5lib.insertDoctypec             C   sV   i }| j r6| jr6| j jjj \}}||d< |d |d< | jj||f|}t|| j|S )N
sourceline   	sourcepos)r&   r4   r-   r.   positionr"   new_tagrH   )r   r0   	namespacerE   rP   rR   tagr   r   r   elementClass   s    z#TreeBuilderForHtml5lib.elementClassc             C   s   t t|| jS )N)TextNoder   r"   )r   datar   r   r   commentClass   s    z#TreeBuilderForHtml5lib.commentClassc             C   s0   ddl m} |dd| _d| j_t| j| jd S )Nr   )r?   r@   zhtml.parserz[document_fragment])rB   r?   r"   r0   rH   )r   r?   r   r   r   fragmentClass   s    z$TreeBuilderForHtml5lib.fragmentClassc             C   s   | j j|j d S )N)r"   appendelement)r   noder   r   r   appendChild   s    z"TreeBuilderForHtml5lib.appendChildc             C   s   | j S )N)r"   )r   r   r   r   getDocument   s    z"TreeBuilderForHtml5lib.getDocumentc             C   s   t jj| jS )N)treebuilder_baseTreeBuildergetFragmentr]   )r   r   r   r   rc      s    z"TreeBuilderForHtml5lib.getFragmentc                sB   ddl m  g tjdd fdd	|d djS )Nr   )r?   z8^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$c       	         s  t |  r
t | trj| }|r|jd}|jdkrx|jdpBd}|jdpZ|jdpZd}jdd| |||f  qjdd| |f  njd	d| f  nHt | tr̈jd
d| | f  n$t | trjdd| | f  n | jrdt	| j | j
f }n| j
}jdd| |f  | jrg }x`t| jj D ]N\}}t |trndt	|j |j
f }t |trdj|}|j||f qFW x2t|D ]&\}}jdd|d  ||f  qW |d7 }x| jD ]}|| qW d S )NrQ      r@   r   r   z|%s<!DOCTYPE %s "%s" "%s"> z|%s<!DOCTYPE %s>z|%s<!DOCTYPE >z|%s<!-- %s -->z|%s"%s"z%s %sz|%s<%s>z
|%s%s="%s")r(   r   matchgroup	lastindexr\   r   r   rU   r   r0   attrslistitemsr	   joinsortedchildren)	r]   indentmr0   rJ   rK   
attributesvaluechild)r?   
doctype_rervserializeElementr   r   rv      sD    








"z?TreeBuilderForHtml5lib.testSerializer.<locals>.serializeElement
)r   )rB   r?   recompilerl   )r   r]   r   )r?   rt   ru   rv   r   testSerializer   s    
)
z%TreeBuilderForHtml5lib.testSerializer)NT)r9   r:   r;   rD   rI   rO   rW   rZ   r[   r_   r`   rc   rz   __classcell__r   r   )rF   r   r5      s    r5   c               @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )AttrListc             C   s   || _ t| j j| _d S )N)r]   r'   ri   )r   r]   r   r   r   rD      s    zAttrList.__init__c             C   s   t | jj j S )N)rj   ri   rk   __iter__)r   r   r   r   r}      s    zAttrList.__iter__c             C   s^   | j jp
i }||jdg ks<| j j|krP||j| j jg krPt|tsPtj|}|| j |< d S )N*)r]   Zcdata_list_attributesgetr0   r(   rj   r
   findall)r   r0   rr   Z	list_attrr   r   r   __setitem__   s    

zAttrList.__setitem__c             C   s   t | jj S )N)rj   ri   rk   )r   r   r   r   rk   
  s    zAttrList.itemsc             C   s   t | jj S )N)rj   ri   keys)r   r   r   r   r     s    zAttrList.keysc             C   s
   t | jS )N)lenri   )r   r   r   r   __len__  s    zAttrList.__len__c             C   s
   | j | S )N)ri   )r   r0   r   r   r   __getitem__  s    zAttrList.__getitem__c             C   s   |t | jj kS )N)rj   ri   r   )r   r0   r   r   r   __contains__  s    zAttrList.__contains__N)r9   r:   r;   rD   r}   r   rk   r   r   r   r   r   r   r   r   r|      s   r|   c               @   sx   e Zd Zdd Zdd Zdd Zdd ZeeeZdd
dZ	dd Z
dd Zdd Zdd Zdd Zdd ZeeZd	S )rH   c             C   s&   t jj| |j || _|| _|| _d S )N)ra   NoderD   r0   r]   r"   rU   )r   r]   r"   rU   r   r   r   rD     s    zElement.__init__c             C   s,  d  }}t |tr| }}n:t |tr,|}n*|jjtkrJ|j }}| |_n|j}| |_t |t rv|jd k	rv|jj  |d k	r| jjr| jjd jtkr| jjd }| j	j
|| }|j| || j	_n`t |tr| j	j
|}| jjr| jjd}n | jjd k	r| j	j }n| j}| j	j|| j|d d S )NrQ   F)parentmost_recent_elementr   )r(   r)   r   r]   rF   r   r   extractcontentsr"   
new_stringreplace_withZ_most_recent_element_last_descendantnext_elementrL   )r   r^   Zstring_childrs   Zold_elementZnew_elementr   r   r   r   r_     s8    







zElement.appendChildc             C   s   t | jtri S t| jS )N)r(   r]   r   r|   )r   r   r   r   getAttributesS  s    zElement.getAttributesc             C   s   |d k	rt |dkrg }x8t|j D ](\}}t|tr&t| }||= |||< q&W | jjj| j	| x"t|j D ]\}}|| j
|< qrW | jjj| j
 d S )Nr   )r   rj   rk   r(   tupler	   r"   ZbuilderZ$_replace_cdata_list_attribute_valuesr0   r]   Zset_up_substitutions)r   rq   Zconverted_attributesr0   rr   new_namer   r   r   setAttributesX  s    

zElement.setAttributesNc             C   s4   t | jj|| j}|r&| j|| n
| j| d S )N)rX   r"   r   insertBeforer_   )r   rY   r   textr   r   r   
insertTextn  s    zElement.insertTextc             C   s   | j j|j }|j jtkrf| j jrf| j j|d  jtkrf| j j|d  }| jj||j  }|j| n| j j||j  | |_	d S )NrQ   )
r]   indexrF   r   r   r"   r   r   insertr   )r   r^   refNoder   old_nodeZnew_strr   r   r   r   u  s    zElement.insertBeforec             C   s   |j j  d S )N)r]   r   )r   r^   r   r   r   removeChild  s    zElement.removeChildc             C   s   | j }|j }|j}|jdd}t|jdkr>|jd }|j}n
d}|j}|j}t|dkr|d }	|dk	rr||	_n||	_||	_|dk	r|	|_n|	|_|dk	r|	|_|d jdd}
||
_|dk	r|
|_d|
_x|D ]}||_|jj	| qW g |_||_dS )z1Move all of this tag's children into another tag.Fr   rQ   NTr   r   )
r]   Znext_siblingr   r   r   r   Zprevious_elementZprevious_siblingr   r\   )r   Z
new_parentr]   Znew_parent_elementZfinal_next_elementZnew_parents_last_descendantZnew_parents_last_childZ(new_parents_last_descendant_next_elementZ	to_appendZfirst_childZlast_childs_last_descendantrs   r   r   r   reparentChildren  s>    

zElement.reparentChildrenc             C   sF   | j j| jj| j}t|| j | j}x| jD ]\}}||j|< q,W |S )N)r"   rT   r]   r0   rU   rH   rq   )r   rV   r^   keyrr   r   r   r   	cloneNode  s
    zElement.cloneNodec             C   s   | j jS )N)r]   r   )r   r   r   r   
hasContent  s    zElement.hasContentc             C   s(   | j d krtd | jfS | j | jfS d S )Nhtml)rU   r   r0   )r   r   r   r   getNameTuple  s    
zElement.getNameTuple)N)r9   r:   r;   rD   r_   r   r   propertyrq   r   r   r   r   r   r   r   	nameTupler   r   r   r   rH     s   6

BrH   c               @   s   e Zd Zdd Zdd ZdS )rX   c             C   s   t jj| d  || _|| _d S )N)ra   r   rD   r]   r"   )r   r]   r"   r   r   r   rD     s    zTextNode.__init__c             C   s   t d S )N)NotImplementedError)r   r   r   r   r     s    zTextNode.cloneNodeN)r9   r:   r;   rD   r   r   r   r   r   rX     s   rX   )$__license____all__r   rx   Zbs4.builderr   r   r   r   r   Zbs4.elementr	   r
   r   Zhtml5lib.constantsr   r   r   r   r   r   Zhtml5lib.treebuildersr   ra   r*   ImportErrorer   r   rb   r5   objectr|   r   rH   rX   r   r   r   r   <module>   s(   Vx C