3
d                  @   sh   d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ejje ddG d	d
 d
e
e	ZdS )zDTests to ensure that the html5lib tree builder generates good trees.    N)BeautifulSoup)SoupStrainer   )HTML5LIB_PRESENTHTML5TreeBuilderSmokeTestSoupTestz?html5lib seems not to be present, not testing its tree builder.)reasonc               @   s   e Zd ZdZedd Zdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"S )#TestHTML5LibBuilderz"See ``HTML5TreeBuilderSmokeTest``.c             C   s   ddl m} |S )Nr   )HTML5TreeBuilder)Zbs4.builderr
   )selfr
    r   A/tmp/pip-build-8z3xcdsh/beautifulsoup4/bs4/tests/test_html5lib.pydefault_builder   s    z#TestHTML5LibBuilder.default_builderc             C   sr   t d}d}tjdd}t|d|d}W d Q R X |j | j|ksHt|\}|jtks\tdt	|j
ksntd S )Nbz<p>A <b>bold</b> statement.</p>T)recordhtml5lib)Z
parse_onlyz4the html5lib tree builder doesn't support parse_only)r   warningscatch_warningsr   decodeZdocument_forAssertionErrorfilename__file__strmessage)r   Zstrainermarkupwsoupwarningr   r   r   test_soupstrainer   s    z%TestHTML5LibBuilder.test_soupstrainerc             C   s   d}| j |d | j d dS )z8html5lib inserts <tbody> tags where other parsers don't.z[<table id="1"><tr><td>Here's another table:<table id="2"><tr><td>foo</td></tr></table></td>z<table id="1"><tbody><tr><td>Here's another table:<table id="2"><tbody><tr><td>foo</td></tr></tbody></table></td></tr></tbody></table>z{<table><thead><tr><td>Foo</td></tr></thead><tbody><tr><td>Bar</td></tr></tbody><tfoot><tr><td>Baz</td></tr></tfoot></table>N)Zassert_soup)r   r   r   r   r   test_correctly_nested_tables&   s    z0TestHTML5LibBuilder.test_correctly_nested_tablesc             C   s$   d}| j |}d|jj ks td S )Nzy<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html>
  <head>
  </head>
  <body>
   <p>foo</p>
  </body>
</html>s
   <p>foo</p>)r   pencoder   )r   r   r   r   r   r   (test_xml_declaration_followed_by_doctype:   s    	
z<TestHTML5LibBuilder.test_xml_declaration_followed_by_doctypec             C   s:   d}| j |}d|jj ks tdt|jdks6td S )Nz%<p><em>foo</p>
<p>bar<a></a></em></p>zD<body><p><em>foo</em></p><em>
</em><p><em>bar<a></a></em></p></body>   r    )r   bodyr   r   lenfind_all)r   r   r   r   r   r   test_reparented_markupH   s    
z*TestHTML5LibBuilder.test_reparented_markupc             C   s:   d}| j |}d|jj ks tdt|jdks6td S )Nz&<p><em>foo</p>
<p>bar<a></a></em></p>
zE<body><p><em>foo</em></p><em>
</em><p><em>bar<a></a></em></p>
</body>r#   r    )r   r$   r   r   r%   r&   )r   r   r   r   r   r   +test_reparented_markup_ends_with_whitespaceO   s    
z?TestHTML5LibBuilder.test_reparented_markup_ends_with_whitespacec             C   sL   d}| j |}|jdd\}}|jd\}}|j|ks:t|j|ksHtdS )zVerify that we keep the two whitespace nodes in this
        document distinct when reparenting the adjacent <tbody> tags.
        z,<table> <tbody><tbody><ims></tbody> </table> )stringtbodyN)r   r&   next_elementr   )r   r   r   Zspace1Zspace2Ztbody1Ztbody2r   r   r   <test_reparented_markup_containing_identical_whitespace_nodesU   s    
zPTestHTML5LibBuilder.test_reparented_markup_containing_identical_whitespace_nodesc             C   s^   d}| j |}|j}d|jks"t|jdd}|jddd }||jksLt||jksZtd S )NzF<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>target)r*   Z	aftermathr   )r   noscriptr,   r   findr&   Zprevious_element)r   r   r   r0   r.   Zfinal_aftermathr   r   r   *test_reparented_markup_containing_children`   s    
z>TestHTML5LibBuilder.test_reparented_markup_containing_childrenc             C   s$   d}| j |}t|jds tdS )z(Processing instructions become comments.s   <?PITarget PIContent?>z<!--?PITarget PIContent?-->N)r   r   
startswithr   )r   r   r   r   r   r   test_processing_instructionp   s    
z/TestHTML5LibBuilder.test_processing_instructionc             C   s8   d}| j |}|jd\}}||ks(t||k	s4td S )Ns   <a class="my_class"><p></a>a)r   r&   r   )r   r   r   Za1Za2r   r   r   test_cloned_multivalue_nodev   s
    
z/TestHTML5LibBuilder.test_cloned_multivalue_nodec             C   s$   d}| j |}d|jj ks td S )Ns   <table><td></tbody>Az><body>A<table><tbody><tr><td></td></tr></tbody></table></body>)r   r$   r   r   )r   r   r   r   r   r   test_foster_parenting}   s    
z)TestHTML5LibBuilder.test_foster_parentingc             C   sL   d}| j |}dd |dD  dd |dD  t|jddksHtd	S )
z
        Test that extraction does not destroy the tree.

        https://bugs.launchpad.net/beautifulsoup/+bug/1782928
        zW
<html><head></head>
<style>
</style><script></script><body><p>hello</p></body></html>
c             S   s   g | ]}|j  qS r   )extract).0sr   r   r   
<listcomp>   s    z7TestHTML5LibBuilder.test_extraction.<locals>.<listcomp>scriptc             S   s   g | ]}|j  qS r   )r8   )r9   r:   r   r   r   r;      s    styler    r   N)r   r%   r&   r   )r   r   r   r   r   r   test_extraction   s
    
z#TestHTML5LibBuilder.test_extractionc             C   sJ   d}| j |}g }x"|jdD ]}|j|jd qW t|dksFtdS )z
        Test that empty comment does not break structure.

        https://bugs.launchpad.net/beautifulsoup/+bug/1806598
        zI
<html>
<body>
<form>
<!----><input type="text">
</form>
</body>
</html>
forminputr   N)r   r&   extendr%   r   )r   r   r   inputsr?   r   r   r   test_empty_comment   s    
z&TestHTML5LibBuilder.test_empty_commentc             C   sz   d}| j |}d|jjkstd|jjks.td|jjdjksDt| j |dd}d|jjjksdtd|jjjksvtd S )Nz=
   <p>

<sourceline>
<b>text</b></sourceline><sourcepos></p>r#      
sourcelineF)Zstore_line_numbers	sourcepos)r   r    rE   r   rF   r1   name)r   r   r   r   r   r   test_tracking_line_numbers   s    
z.TestHTML5LibBuilder.test_tracking_line_numbersc             C   s   d S )Nr   )r   r   r   r   test_special_string_containers   s    z2TestHTML5LibBuilder.test_special_string_containersc       	      C   sn   xhd>D ]`\}}}d*| }| j |j}|j }d+|jd, }||ksFt|jd-d.}d+| }||kstqW d S )?N&RightArrowLeftArrow;   ⇄   &rlarr;&models;   ⊧   &models;&Nfr;   𝔑   &Nfr;&ngeqq;   ≧̸   &ngeqq;&not;   ¬   &not;&Not;   ⫬   &Not;&quot;"   "&there4;   ∴   &there4;&Therefore;&therefore;&fjlig;fj   fj&sqcup;   ⊔   &sqcup;&sqcups;   ⊔︀   &sqcups;&apos;'   '&verbar;|   |z<div>%s</div>s   <div>%s</div>utf8html)	formatterrJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   r`   ra   rc   r`   ra   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   )rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   )r   divr!   r   )	r   Zinput_elementZoutput_unicodeZoutput_elementr   r   Zwithout_elementexpectZwith_elementr   r   r   test_html5_attributes   s0    	              z)TestHTML5LibBuilder.test_html5_attributesN)__name__
__module____qualname____doc__propertyr   r   r   r"   r'   r(   r-   r2   r4   r6   r7   r>   rC   rH   rI   r   r   r   r   r   r	      s"   r	   )r   Zpytestr   Zbs4r   Zbs4.elementr    r   r   r   markZskipifr	   r   r   r   r   <module>   s   