3
d[<                 @   sl   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ G dd deZ	G dd deZ
G dd	 d	eZdS )
    N)BeautifulSoup)EntitySubstitutionEncodingDetectorUnicodeDammitc               @   sf   e Zd ZdZdd Zejjddddd gdd Zdd Z	dd Z
dd Zdd Zdd Zdd ZdS )!TestUnicodeDammitz"Standalone tests of UnicodeDammit.c             C   s   d}t |}|j|kstd S )Nu   I'm already Unicode! ☃)r   unicode_markupAssertionError)selfmarkupdammit r   ?/tmp/pip-build-8z3xcdsh/beautifulsoup4/bs4/tests/test_dammit.pytest_unicode_input   s    z$TestUnicodeDammit.test_unicode_inputz smart_quotes_to,expect_convertedN   ‘’“”xml &#x2018;&#x2019;&#x201C;&#x201D;html&lsquo;&rsquo;&ldquo;&rdquo;asciiz''z""c             C   s,   d}t |dg|dj}|dj|ks(tdS )zbVerify the functionality of the smart_quotes_to argument
        to the UnicodeDammit constructor.s   <foo></foo>zwindows-1252)known_definite_encodingssmart_quotes_toz<foo>{}</foo>N)r   r   formatr   )r	   r   Zexpect_convertedr
   Z	convertedr   r   r   test_smart_quotes_to   s
    
z&TestUnicodeDammit.test_smart_quotes_toc             C   s0   d}t |}|jj dkst|jdks,td S )Ns   Sacré bleu! ☃zutf-8u   Sacré bleu! ☃)r   original_encodinglowerr   r   )r	   utf8r   r   r   r   test_detect_utf8&   s    z"TestUnicodeDammit.test_detect_utf8c             C   s4   d}t |dg}|jj dks"t|jdks0td S )Ns   z
iso-8859-8u   םולש)r   r   r   r   r   )r	   hebrewr   r   r   r   test_convert_hebrew,   s    z%TestUnicodeDammit.test_convert_hebrewc             C   s6   d}t |}|jj dkst|jjd|ks2td S )Ns   ケータイ Watchzutf-8)r   r   r   r   r   encode)r	   utf_8r   r   r   r   /test_dont_see_smart_quotes_where_there_are_none2   s    zATestUnicodeDammit.test_dont_see_smart_quotes_where_there_are_nonec             C   s,   dj d}t|dg}|jj dks(td S )Nu   Räksmörgåszutf-8z
iso-8859-8)r   r   r   r   r   )r	   	utf8_datar   r   r   r    test_ignore_inappropriate_codecs8   s    
z2TestUnicodeDammit.test_ignore_inappropriate_codecsc             C   s:   dj d}x*dD ]"}t||g}|jj dkstqW d S )Nu   Räksmörgåszutf-8.utf8...
utF---16.!)r$   r%   r&   )r   r   r   r   r   )r	   r"   Zbad_encodingr   r   r   r   test_ignore_invalid_codecs=   s    

z,TestUnicodeDammit.test_ignore_invalid_codecsc             C   sL   dj d}t|dgd}|jj dks*tt|ddgd}|jd ksHtd S )Nu   Räksmörgåszutf-8)Zexclude_encodingszwindows-1252)r   r   r   r   r   )r	   r"   r   r   r   r   test_exclude_encodingsC   s    
z(TestUnicodeDammit.test_exclude_encodings)Nr   )r   r   )r   r   ''"")r   r)   )__name__
__module____qualname____doc__r   pytestmarkparametrizer   r   r   r!   r#   r'   r(   r   r   r   r   r      s   r   c               @   sT   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd ZdS )TestEncodingDetectorc             C   s"   t d}t|j}d|kstd S )Ns'   <?xml version="1.0" encoding="UTF-" ?>u   utf-�)r   list	encodingsr   )r	   Zdetectedr3   r   r   r   Ptest_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterS   s    
zeTestEncodingDetector.test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterc             C   s,   x&dD ]}t |dd}d|jkstqW d S )	N&   <html><meta charset="euc-jp" /></html>&   <html><meta charset='euc-jp' /></html>$   <html><meta charset=euc-jp /></html>#   <html><meta charset=euc-jp/></html>T)is_htmlzeuc-jp)r5   r6   r7   r8   )r   r   r   )r	   datar   r   r   r    test_detect_html5_style_meta_tagY   s       z5TestEncodingDetector.test_detect_html5_style_meta_tagc             C   s   d}t jj}tjtj zLdd }|t j_t|}d|jks@td|j	ksNtt
|d}|jsbtW d tjtj |t j_X d S )NsT   ﻿<?xml version="1.0" encoding="UTF-8"?>
<html><b>بتر</b>
<i>ѐ</i></html>c             S   s   d S )Nr   )strr   r   r   noopy   s    zETestEncodingDetector.test_last_ditch_entity_replacement.<locals>.noopTu   �zhtml.parser)bs4r   Zchardet_dammitloggingdisableWARNINGr   Zcontains_replacement_charactersr   r   r   NOTSET)r	   docchardetr=   r   Zsoupr   r   r   "test_last_ditch_entity_replacementc   s    
z7TestEncodingDetector.test_last_ditch_entity_replacementc             C   s,   d}t |}d|jkstd|jks(td S )Ns   < a >   < / a > u   <a>áé</a>zutf-16le)r   r   r   r   )r	   r:   r   r   r   r   test_byte_order_mark_removed   s    z1TestEncodingDetector.test_byte_order_mark_removedc             C   s   d}t |}t |dgd}d|jks(tt |dgd}d|jksDtdgdd |jD ks^td	}t |dgd
gd}d
|jkstdd
gdd |jD kstd S )Ns   < a >   < / a > zutf-16)r   zutf-8)user_encodingszutf-16lec             S   s   g | ]}|d  qS )r   r   ).0xr   r   r   
<listcomp>   s    zRTestEncodingDetector.test_known_definite_versus_user_encodings.<locals>.<listcomp>s   z
iso-8859-8)r   rG   c             S   s   g | ]}|d  qS )r   r   )rH   rI   r   r   r   rJ      s    )r   r   r   tried_encodings)r	   r:   r   beforeafterr   r   r   r   )test_known_definite_versus_user_encodings   s    
z>TestEncodingDetector.test_known_definite_versus_user_encodingsc             C   sJ   d}t |dgdgdgd}d|jks(tdddgdd |jD ksFtd S )Ns   z	shift-jiszutf-8z
iso-8859-8)r   Zoverride_encodingsrG   c             S   s   g | ]}|d  qS )r   r   )rH   rI   r   r   r   rJ      s    zKTestEncodingDetector.test_deprecated_override_encodings.<locals>.<listcomp>)r   r   r   rK   )r	   r   r   r   r   r   "test_deprecated_override_encodings   s    
z7TestEncodingDetector.test_deprecated_override_encodingsc          
   C   s`   dj d}dj d}|| | }tjt |jd W d Q R X tj|}d|jdks\td S )Nu   ☃   r   u   “Hi, I like Windows!”windows_1252u+   ☃☃☃“Hi, I like Windows!”☃☃☃u	   ☃☃☃)r   r.   ZraisesUnicodeDecodeErrordecoder   	detwingler   )r	   r   rQ   rC   Zfixedr   r   r   test_detwingle   s    

z#TestEncodingDetector.test_detwinglec             C   s@   x:dD ]2}|j d}|jds"ttj|}||kstqW d S )N   œ   ₓ   ðr      )rV   rW   rX   )r   endswithr   r   rT   )r	   Ztricky_unicode_charinputoutputr   r   r   +test_detwingle_ignores_multibyte_characters   s      

z@TestEncodingDetector.test_detwingle_ignores_multibyte_charactersc             C   s  d}|j d}d}|j d}tj}||ddd ks6td||ddksJtd||ddks^td||ksntd||ks~td	d
 }||| d kst||| d kst||| ddddkst||dddkst|d	| dddkst|d| ddd kstd S )Nz0<html><head><meta charset="utf-8"></head></html>r   z,<?xml version="1.0" encoding="ISO-8859-1" ?>F)r9   zutf-8Tz
iso-8859-1    i  )r9   search_entire_document)r_      a)r   r   Zfind_declared_encodingr   )r	   Zhtml_unicodeZ
html_bytesZxml_unicodeZ	xml_bytesmZspacerr   r   r   test_find_declared_encoding   s$    


z0TestEncodingDetector.test_find_declared_encodingN)r*   r+   r,   r4   r;   rE   rF   rN   rO   rU   r]   rb   r   r   r   r   r1   Q   s   
#r1   c               @   s   e Zd ZdZdd Zejjdd$d%gd	d
 Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#S )&TestEntitySubstitutionz1Standalone tests of the EntitySubstitution class.c             C   s
   t | _d S )N)r   sub)r	   r   r   r   setup_method  s    z#TestEntitySubstitution.setup_methodzoriginal,substituted   foo∀☃õbar   foo&forall;☃&otilde;bar   ‘’foo“”&lsquo;&rsquo;foo&ldquo;&rdquo;c             C   s   | j j||kstd S )N)rd   substitute_htmlr   )r	   originalZsubstitutedr   r   r   test_substitute_html  s    z+TestEntitySubstitution.test_substitute_htmlc             C   s>   x8dD ]0\}}d}|| }|| }| j j||kstqW d S )N&models;   ⊧&Nfr;   𝔑&ngeqq;   ≧̸&not;   ¬&Not;   ⫬||fj&gt;>&lt;<&amp;&z3 %s 4rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rx   rx   ry   rz   r{   r|   r}   r~   )
r   r   r   r   r   rw   r   r   r   r   )rd   rj   r   )r	   entityutemplaterawZwith_entitiesr   r   r   test_html5_entity%  s             z(TestEntitySubstitution.test_html5_entityc             C   s<   d}d}| j j||kstd}d}| j j||ks8td S )Nu   fjords ⊔ penguinszfjords &sqcup; penguinsu   fjords ⊔︀ penguinszfjords &sqcups; penguins)rd   rj   r   )r	   r:   r
   r   r   r   )test_html5_entity_with_variation_selectorC  s    z@TestEntitySubstitution.test_html5_entity_with_variation_selectorc             C   s   d}| j j|d|kstd S )NzWelcome to "my bar"F)rd   substitute_xmlr   )r	   sr   r   r   Itest_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falseO  s    z`TestEntitySubstitution.test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falsec             C   s0   | j jdddkst| j jdddks,td S )NZWelcomeTz	"Welcome"z	Bob's Barz"Bob's Bar")rd   r   r   )r	   r   r   r   6test_xml_attribute_quoting_normally_uses_double_quotesS  s    zMTestEntitySubstitution.test_xml_attribute_quoting_normally_uses_double_quotesc             C   s   d}| j j|ddkstd S )NzWelcome to "my bar"Tz'Welcome to "my bar"')rd   r   r   )r	   r   r   r   r   Otest_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesW  s    zfTestEntitySubstitution.test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesc             C   s   d}| j j|ddkstd S )NzWelcome to "Bob's Bar"Tz""Welcome to &quot;Bob's Bar&quot;")rd   r   r   )r	   r   r   r   r   btest_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes[  s    zyTestEntitySubstitution.test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotesc             C   s   d}| j j||kstd S )NzWelcome to "Bob's Bar")rd   r   r   )r	   quotedr   r   r   <test_xml_quotes_arent_escaped_when_value_is_not_being_quoted_  s    zSTestEntitySubstitution.test_xml_quotes_arent_escaped_when_value_is_not_being_quotedc             C   s   | j jddkstd S )Nzfoo<bar>zfoo&lt;bar&gt;)rd   r   r   )r	   r   r   r   'test_xml_quoting_handles_angle_bracketsc  s    z>TestEntitySubstitution.test_xml_quoting_handles_angle_bracketsc             C   s   | j jddkstd S )NzAT&TzAT&amp;T)rd   r   r   )r	   r   r   r   #test_xml_quoting_handles_ampersandsf  s    z:TestEntitySubstitution.test_xml_quoting_handles_ampersandsc             C   s   | j jddkstd S )Nz&Aacute;T&Tz&amp;Aacute;T&amp;T)rd   r   r   )r	   r   r   r   Etest_xml_quoting_including_ampersands_when_they_are_part_of_an_entityi  s    z\TestEntitySubstitution.test_xml_quoting_including_ampersands_when_they_are_part_of_an_entityc             C   s   | j jddkstd S )Nz&Aacute;T&Tz&Aacute;T&amp;T)rd   Z"substitute_xml_containing_entitiesr   )r	   r   r   r   Dtest_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityl  s    z[TestEntitySubstitution.test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityc             C   s   d}| j j||kstdS )z:There's no need to do this except inside attribute values.zBob's "bar"N)rd   rj   r   )r	   textr   r   r    test_quotes_not_html_substitutedo  s    z7TestEntitySubstitution.test_quotes_not_html_substitutedN)rf   rg   )rh   ri   )r*   r+   r,   r-   re   r.   r/   r0   rl   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rc     s&    rc   )r.   r?   r>   r   Z
bs4.dammitr   r   r   objectr   r1   rc   r   r   r   r   <module>   s   E ?