3
Ud6                 @   s  d dl mZ d dlmZ d dlmZmZ d dlZd dlZd dl	Z	d dl
mZ d dlZd dlmZ d dlZd dlmZ d dlmZ d dljjZd d	lmZmZmZmZmZmZ d dl j!Z"d d
l#m$Z$ d dl%Zd dl%m&Z& ej'j(e)Z*ej+ddddgddd Z,dd Z-ej.ddd Z/dd Z0ej.dej.ddd Z1ej2j3dej4dej.ddej4dej.ddgdd G d!d" d"Z5dS )#    )partial)reload)BytesIOStringION)URLError)rand)is_platform_windows)ParserError)	DataFrame
MultiIndexSeries	Timestamp
date_rangeread_csv)file_path_to_url)	read_htmlzchinese_utf-16.htmlzchinese_utf-32.htmlzchinese_utf-8.htmlzletz_latin1.html)paramsc             C   s   |ddd| j S )z6Parametrized fixture for HTML encoding test filenames.iodataZhtml_encoding)param)requestdatapath r   M/var/www/html/virt/lib64/python3.6/site-packages/pandas/tests/io/test_html.pyhtml_encoding_file   s    
r   c             O   s   t | t |ks,tdt |  dt | d}ttdd | |}|sPt|x:t| |D ],\}}tj||f|| |j s\tdq\W d S )Nz*lists are not of equal size len(list1) == z, len(list2) == z$not all list elements are DataFramesc             S   s   t | tot |tS )N)
isinstancer
   )xyr   r   r   <lambda>1   s    z(assert_framelist_equal.<locals>.<lambda>zframes are both empty)lenAssertionErrorallmapziptmassert_frame_equalempty)Zlist1Zlist2argskwargsmsgZboth_framesZframe_iZframe_jr   r   r   assert_framelist_equal(   s    r*   bs4c             C   sJ   dd l }| j|dd tjtdd t|dddd	d
d W d Q R X d S )Nr   __version__z4.2zPandas requires version)matchr   r   htmlz	spam.htmlr+   )flavor)r+   setattrpytestraisesImportErrorr   )Zmonkeypatchr   r+   r   r   r   test_bs4_version_fails<   s    r4   c              C   s@   d} d}d| d }t jt|d t| d|d W d Q R X d S )Nz
google.comzinvalid flavorz\{z \} is not a valid set of flavors)r-   Zgoogle)r-   r/   )r1   r2   
ValueErrorr   )urlr/   r)   r   r   r   test_invalid_flavorE   s
    r7   lxmlc             C   s<   | dddd}t |ddgd}t |ddgd}t|| d S )	Nr   r   r.   zvalid_markup.htmlr   r8   )	index_colr/   r+   )r   r*   )r   filenameZdfs_lxmlZdfs_bs4r   r   r   test_same_orderingN   s    r;   r/   )Zmarksclass)scopec               @   sh  e Zd Zejdddd Zejddddd Zd	d
 Zej	dd Z
ej	dd Zej	dd Zejjdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Z d5d6 Z!d7d8 Z"d9d: Z#ej	d;d< Z$ej	ejjd=d> Z%ejjd?d@ Z&ejjdAdB Z'dCdD Z(ejjdEdF Z)ejjdGdH Z*ejjdIdJ Z+ejjdKdL Z,ejjdMdN Z-ejjdOdP Z.ejjdQdR Z/dSdT Z0ej	dUdV Z1ej	dWdX Z2dYdZ Z3d[d\ Z4d]d^ Z5d_d` Z6dadb Z7dcdd Z8ejjdedf Z9ejjdgdh Z:didj Z;dkdl Z<dmdn Z=dodp Z>dqdr Z?dsdt Z@dudv ZAdwdx ZBdydz ZCd{d| ZDd}d~ ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdd ZLdd ZMdd ZNdd ZOejjdd ZPdd ZQejjRddeSdgdfdeSdgeSdgfgdd ZTdd ZUdd ZVdd ZWejjdd ZXdS )TestReadHtmlT)autousec             C   s4   |dddd| _ i | _d| jd< |dddd| _d S )Nr   r   r.   z	spam.htmlzUTF-8encodingzbanklist.html)	spam_dataspam_data_kwargsbanklist_data)selfr   r   r   r   	set_files`   s    
zTestReadHtml.set_filesfunction)r?   r=   c             c   s   t t|d| _d V  d S )N)r/   )r   r   )rD   r/   r   r   r   r   set_defaultsg   s    zTestReadHtml.set_defaultsc             C   sV   t jdddd dddjdjjt}|j }| j|dd	id
dd
 }t j|| d S )N      c              W   s   t  S )N)r   )r'   r   r   r   r   q   s    z2TestReadHtml.test_to_html_compat.<locals>.<lambda>F)Z
data_gen_fZc_idx_namesZr_idx_namesz{0:.3f}r<   Z	dataframer   )attrsr9   )	r$   ZmakeCustomDataframeapplymapformatZastypefloatto_htmlr   r%   )rD   dfoutresr   r   r   test_to_html_compatl   s    z TestReadHtml.test_to_html_compatc             C   sf   d}t jt | j|dddid}W d Q R X t jt | j|dddid}W d Q R X t|| d S )Nz8http://www.fdic.gov/bank/individual/failed/banklist.htmlzFirst Federal Bank of Floridaidtable)rJ   zMetcalf Bank)r$   Zassert_produces_warningFutureWarningr   r*   )rD   r6   df1df2r   r   r   "test_banklist_url_positional_match|   s    z/TestReadHtml.test_banklist_url_positional_matchc             C   s:   d}| j |dddid}| j |dddid}t|| d S )Nz8http://www.fdic.gov/bank/individual/failed/banklist.htmlzFirst Federal Bank of FloridarS   rT   )r-   rJ   zMetcalf Bank)r   r*   )rD   r6   rV   rW   r   r   r   test_banklist_url   s
    zTestReadHtml.test_banklist_urlc             C   s.   d}| j |dd}| j |dd}t|| d S )Nz^https://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/io/data/html/spam.htmlz	.*Water.*)r-   Unit)r   r*   )rD   r6   rV   rW   r   r   r   test_spam_url   s    zTestReadHtml.test_spam_urlc             C   s:   | j | jdddid}| j | jdddid}t|| d S )Nz.*Florida.*rS   rT   )r-   rJ   zMetcalf Bank)r   rC   r*   )rD   rV   rW   r   r   r   test_banklist   s
    zTestReadHtml.test_banklistc             C   sZ   | j | jdd}| j | jdd}t|| |d jd dks@t|d jd dksVtd S )Nz	.*Water.*)r-   rZ   r   
ProximatesZNutrient)r   r   )r   rA   r*   ilocr    columns)rD   rV   rW   r   r   r   	test_spam   s
    
zTestReadHtml.test_spamc             C   s,   | j | j}x|D ]}t|tstqW d S )N)r   rA   r   r
   r    )rD   dfsrO   r   r   r   test_spam_no_match   s    
zTestReadHtml.test_spam_no_matchc             C   s4   | j | jddid}x|D ]}t|tstqW d S )NrS   rT   )rJ   )r   rC   r   r
   r    )rD   ra   rO   r   r   r   test_banklist_no_match   s    
z#TestReadHtml.test_banklist_no_matchc             C   s8   | j | jdddd }|jd dks(t|j s4td S )Nz	.*Water.*   )r-   headerr   r]   )r   rA   r_   r    r&   )rD   rO   r   r   r   test_spam_header   s    zTestReadHtml.test_spam_headerc             C   s2   | j | jddd}| j | jddd}t|| d S )Nz	.*Water.*   )r-   skiprowsrZ   )r   rA   r*   )rD   rV   rW   r   r   r   test_skiprows_int   s    zTestReadHtml.test_skiprows_intc             C   s:   | j | jdtdd}| j | jdtdd}t|| d S )Nz	.*Water.*rd   )r-   rh   rZ   )r   rA   ranger*   )rD   rV   rW   r   r   r   test_skiprows_range   s    z TestReadHtml.test_skiprows_rangec             C   s:   | j | jdddgd}| j | jdddgd}t|| d S )Nz	.*Water.*rg   rd   )r-   rh   rZ   )r   rA   r*   )rD   rV   rW   r   r   r   test_skiprows_list   s    zTestReadHtml.test_skiprows_listc             C   s:   | j | jdddhd}| j | jdddhd}t|| d S )Nz	.*Water.*rg   rd   )r-   rh   rZ   )r   rA   r*   )rD   rV   rW   r   r   r   test_skiprows_set   s    zTestReadHtml.test_skiprows_setc             C   s2   | j | jddd}| j | jddd}t|| d S )Nz	.*Water.*rg   )r-   rh   rZ   )r   rA   r*   )rD   rV   rW   r   r   r   test_skiprows_slice   s    z TestReadHtml.test_skiprows_slicec             C   s:   | j | jdtdd}| j | jdtdd}t|| d S )Nz	.*Water.*rd   )r-   rh   rZ   )r   rA   slicer*   )rD   rV   rW   r   r   r   test_skiprows_slice_short   s    z&TestReadHtml.test_skiprows_slice_shortc             C   s@   | j | jdtddd}| j | jdtdddd}t|| d S )	Nz	.*Water.*rd      )r-   rh   rZ   rH   rg   )r   rA   ro   r*   )rD   rV   rW   r   r   r   test_skiprows_slice_long   s    z%TestReadHtml.test_skiprows_slice_longc             C   s>   | j | jdtjdd}| j | jdtjdd}t|| d S )Nz	.*Water.*rd   )r-   rh   rZ   )r   rA   npZaranger*   )rD   rV   rW   r   r   r   test_skiprows_ndarray   s    z"TestReadHtml.test_skiprows_ndarrayc             C   s0   t jtdd | j| jddd W d Q R X d S )Nz%is not a valid type for skipping rows)r-   z	.*Water.*Zasdf)r-   rh   )r1   r2   	TypeErrorr   rA   )rD   r   r   r   test_skiprows_invalid   s    z"TestReadHtml.test_skiprows_invalidc             C   s2   | j | jddd}| j | jddd}t|| d S )Nz	.*Water.*r   )r-   r9   rZ   )r   rA   r*   )rD   rV   rW   r   r   r   
test_index   s    zTestReadHtml.test_indexc             C   s6   | j | jdddd}| j | jdddd}t|| d S )Nz	.*Water.*rg   r   )r-   re   r9   rZ   )r   rA   r*   )rD   rV   rW   r   r   r   test_header_and_index_no_types   s    z+TestReadHtml.test_header_and_index_no_typesc             C   s6   | j | jdddd}| j | jdddd}t|| d S )Nz	.*Water.*rg   r   )r-   re   r9   rZ   )r   rA   r*   )rD   rV   rW   r   r   r    test_header_and_index_with_types   s    z-TestReadHtml.test_header_and_index_with_typesc             C   s2   | j | jddd}| j | jddd}t|| d S )Nz	.*Water.*r   )r-   r9   rZ   )r   rA   r*   )rD   rV   rW   r   r   r   test_infer_types  s    zTestReadHtml.test_infer_typesc             C   sz   t | jf| j}t|j }W d Q R X t | jf| j}t|j }W d Q R X | j|dd}| j|dd}t|| d S )Nz	.*Water.*)r-   rZ   )openrA   rB   r   readr   r*   )rD   fdata1data2rV   rW   r   r   r   test_string_io
  s    zTestReadHtml.test_string_ioc             C   sN   t | jf| j}|j }W d Q R X | j|dd}| j|dd}t|| d S )Nz	.*Water.*)r-   rZ   )r|   rA   rB   r}   r   r*   )rD   r~   r   rV   rW   r   r   r   test_string  s
    zTestReadHtml.test_stringc             C   sb   t | jf| j}| j|dd}W d Q R X t | jf| j}| j|dd}W d Q R X t|| d S )Nz	.*Water.*)r-   rZ   )r|   rA   rB   r   r*   )rD   r~   rV   rW   r   r   r   test_file_like  s
    zTestReadHtml.test_file_likec             C   s(   t jt | jddd W d Q R X d S )Nzgit://github.comz	.*Water.*)r-   )r1   r2   r   r   )rD   r   r   r   test_bad_url_protocol'  s    z"TestReadHtml.test_bad_url_protocolc             C   s`   y(t jt | jddd W d Q R X W n2 tk
rZ } zdt|ksJtW Y d d }~X nX d S )Nzhttp://www.a23950sdfa908sd.comz	.*Water.*)r-   zNo tables found)r1   r2   r   r   r5   strr    )rD   er   r   r   test_invalid_url,  s
    zTestReadHtml.test_invalid_urlc             C   sT   | j }| jttjj|dddid}t|ts4tx|D ]}t|t	s:tq:W d S )NZFirstrS   rT   )r-   rJ   )
rC   r   r   ospathabspathr   listr    r
   )rD   r6   ra   rO   r   r   r   test_file_url5  s    
zTestReadHtml.test_file_urlc             C   s8   | j }tjtdd | j|dddid W d Q R X d S )NzNo tables found)r-   zFirst Federal Bank of FloridarS   Z	tasdfable)r-   rJ   )rC   r1   r2   r5   r   )rD   r6   r   r   r   test_invalid_table_attrs?  s    z%TestReadHtml.test_invalid_table_attrsc             O   s"   | j | jf|dddid|S )NMetcalfrS   rT   )r-   rJ   )r   rC   )rD   r'   r(   r   r   r   
_bank_dataG  s    zTestReadHtml._bank_datac             C   s(   | j ddgdd }t|jts$td S )Nr   rg   )re   )r   r   r_   r   r    )rD   rO   r   r   r   test_multiindex_headerL  s    z#TestReadHtml.test_multiindex_headerc             C   s(   | j ddgdd }t|jts$td S )Nr   rg   )r9   )r   r   indexr   r    )rD   rO   r   r   r   test_multiindex_indexQ  s    z"TestReadHtml.test_multiindex_indexc             C   s>   | j ddgddgdd }t|jts*tt|jts:td S )Nr   rg   )re   r9   )r   r   r_   r   r    r   )rD   rO   r   r   r   test_multiindex_header_indexV  s    z)TestReadHtml.test_multiindex_header_indexc             C   s*   | j ddgddd }t|jts&td S )Nr   rg   )re   rh   )r   r   r_   r   r    )rD   rO   r   r   r   &test_multiindex_header_skiprows_tuples\  s    z3TestReadHtml.test_multiindex_header_skiprows_tuplesc             C   s*   | j ddgddd }t|jts&td S )Nr   rg   )re   rh   )r   r   r_   r   r    )rD   rO   r   r   r   test_multiindex_header_skiprowsa  s    z,TestReadHtml.test_multiindex_header_skiprowsc             C   s@   | j ddgddgddd }t|jts,tt|jts<td S )Nr   rg   )re   r9   rh   )r   r   r   r   r    r_   )rD   rO   r   r   r   %test_multiindex_header_index_skiprowsf  s    z2TestReadHtml.test_multiindex_header_index_skiprowsc             C   s`   | j }| jttjj|tjtjdddid}t|t	s@t
x|D ]}t|tsFt
qFW d S )NZFloridarS   rT   )r-   rJ   )rC   r   r   r   r   r   recompiler   r   r    r
   )rD   r6   ra   rO   r   r   r   test_regex_idempotencyl  s    
z#TestReadHtml.test_regex_idempotencyc             C   s4   d}t jt|d | j| jddd W d Q R X d S )Nz\(you passed a negative value\))r-   ZWaterrg   )r-   rh   rr   )r1   r2   r5   r   rA   )rD   r)   r   r   r   test_negative_skiprowsx  s    z#TestReadHtml.test_negative_skiprowsc             C   s&   d}| j |dd}t|dks"td S )Nzhttps://docs.python.org/2/Python)r-   rg   )r   r   r    )rD   r6   ra   r   r   r   test_multiple_matches}  s    z"TestReadHtml.test_multiple_matchesc             C   s<   d}| j |dd}dd |D }t|tddgks8td S )Nzhttps://docs.python.org/2/r   )r-   c             S   s   g | ]}|j d d d qS )r   rH   )r   r   )r^   ).0rO   r   r   r   
<listcomp>  s    z7TestReadHtml.test_python_docs_table.<locals>.<listcomp>ZRepoZWhat)r   sortedr    )rD   r6   ra   zzr   r   r   test_python_docs_table  s    z#TestReadHtml.test_python_docs_tablec             C   s"   d}| j |}t|dkstdS )z@
        Make sure that read_html ignores empty tables.
        a  
            <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
            </table>
            <table>
                <tbody>
                </tbody>
            </table>
        rg   N)r   r   r    )rD   r.   resultr   r   r   test_empty_tables  s    
zTestReadHtml.test_empty_tablesc             C   s:   | j dd }tddgddggddgd	}tj|| d S )
Na  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </tbody>
            <tbody>
                <tr>
                    <td>3</td>
                    <td>4</td>
                </tr>
            </tbody>
        </table>r   rg   rd   rI   rH   AB)r   r_   )r   r
   r$   r%   )rD   r   expectedr   r   r   test_multiple_tbody  s
    z TestReadHtml.test_multiple_tbodyc             C   s0   | j dd }tddidgd}tj|| dS )zt
        Don't fail with bs4 when there is a header and only one column
        as described in issue #9178
        a3  <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>first</td>
                    </tr>
                </tbody>
            </table>r   Headerfirst)r   r   N)r   r
   r$   r%   )rD   r   r   r   r   r   test_header_and_one_column  s
    z'TestReadHtml.test_header_and_one_columnc             C   s8   | j dd }tdddggdddgd	}tj|| d
S )zK
        Ensure parser adds <tr> within <thead> on malformed HTML.
        a  <table>
            <thead>
                <tr>
                    <th>Country</th>
                    <th>Municipality</th>
                    <th>Year</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Ukraine</td>
                    <th>Odessa</th>
                    <td>1944</td>
                </tr>
            </tbody>
        </table>r   ZUkraineZOdessai  ZCountryZMunicipalityZYear)r   r_   N)r   r
   r$   r%   )rD   r   r   r   r   r   test_thead_without_tr  s    
z"TestReadHtml.test_thead_without_trc             C   s   d}t ddggddgd}t ddgddggddgd}|jd	d
}|jdd
}| j|d }| j|d }tj|| tj|| dS )zh
        Make sure that read_html reads tfoot, containing td or th.
        Ignores empty tfoot
        a  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>bodyA</td>
                    <td>bodyB</td>
                </tr>
            </tbody>
            <tfoot>
                {footer}
            </tfoot>
        </table>ZbodyAZbodyBr   r   )r   r_   ZfootAZfootB )footerz%<tr><td>footA</td><th>footB</th></tr>r   N)r
   rL   r   r$   r%   )rD   Zdata_templateZ	expected1Z	expected2r   r   Zresult1Zresult2r   r   r   test_tfoot_read  s    zTestReadHtml.test_tfoot_readc             C   s4   | j dddd }tddggd	d}tj|| d S )
Na
  
            <table>
                <tr>
                    <td>S</td>
                    <td>I</td>
                </tr>
                <tr>
                    <td>text</td>
                    <td>1944</td>
                </tr>
            </table>
        r   )re   texti  SI)r_   )r   r   )r   r
   r$   r%   )rD   r   r   r   r   r   &test_parse_header_of_non_string_column  s    z3TestReadHtml.test_parse_header_of_non_string_columnc          
      s   ddl m   fdd}| j| jdddidd }t|d	d
ddttdd}|j|jks^tddddddddddg
}dddddddd d!d"g
}|j|j	||}|j|}|j
d#d#d$}	d%d&g}
|	|
 j
d#d#d'|	|
< tj|	| d S )(Nr   )_remove_whitespacec                s"   y | S  t k
r   | S X d S )N)AttributeError)r   )r   r   r   try_remove_ws:  s    z8TestReadHtml.test_banklist_header.<locals>.try_remove_wsr   rS   rT   )r-   rJ   r   r   csvzbanklist.csv)zUpdated DatezClosing Date)
convertersz+First Vietnamese American BankIn Vietnamesez!Westernbank Puerto RicoEn Espanolz)R-G Premier Bank of Puerto RicoEn EspanolzEurobankEn EspanolzSanderson State BankEn EspanolzKWashington Mutual Bank(Including its subsidiary Washington Mutual Bank FSB)zSilver State BankEn Espanolz$AmTrade International BankEn EspanolzHamilton Bank, NAEn Espanolz5The Citizens Savings BankPioneer Community Bank, Inc.zFirst Vietnamese American BankzWesternbank Puerto RicozR-G Premier Bank of Puerto RicoZEurobankzSanderson State BankzWashington Mutual BankzSilver State BankzAmTrade International BankzHamilton Bank, NAzThe Citizens Savings BankT)datetimenumericzClosing DatezUpdated Date)r   Zcoerce)pandas.io.htmlr   r   rC   r   r   shaper    rK   replace_convertr$   r%   )rD   r   r   rO   Zground_trutholdnewZdfnewZgtnewZ	convertedZ	date_colsr   )r   r   test_banklist_header6  sD    
z!TestReadHtml.test_banklist_headerc             C   s^   d}t | jd}|j }W d Q R X ||ks0t| j| jdddidd }||j ksZtd S )NzGold CanyonrrS   rT   )r-   rJ   r   )r|   rC   r}   r    r   Z	to_string)rD   gcr~   Zraw_textrO   r   r   r   test_gold_canyonh  s    zTestReadHtml.test_gold_canyonc             C   s4   | j dddd }| j dddd }tj|| d S )Na  <table>
                        <thead>
                            <tr style="text-align: right;">
                            <th></th>
                            <th>C_l0_g0</th>
                            <th>C_l0_g1</th>
                            <th>C_l0_g2</th>
                            <th>C_l0_g3</th>
                            <th>C_l0_g4</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <th>R_l0_g0</th>
                            <td> 0.763</td>
                            <td> 0.233</td>
                            <td> nan</td>
                            <td> nan</td>
                            <td> nan</td>
                            </tr>
                            <tr>
                            <th>R_l0_g1</th>
                            <td> 0.244</td>
                            <td> 0.285</td>
                            <td> 0.392</td>
                            <td> 0.137</td>
                            <td> 0.222</td>
                            </tr>
                        </tbody>
                    </table>r   )r9   a  <table>
                    <thead>
                        <tr style="text-align: right;">
                        <th></th>
                        <th>C_l0_g0</th>
                        <th>C_l0_g1</th>
                        <th>C_l0_g2</th>
                        <th>C_l0_g3</th>
                        <th>C_l0_g4</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                        <th>R_l0_g0</th>
                        <td> 0.763</td>
                        <td> 0.233</td>
                        </tr>
                        <tr>
                        <th>R_l0_g1</th>
                        <td> 0.244</td>
                        <td> 0.285</td>
                        <td> 0.392</td>
                        <td> 0.137</td>
                        <td> 0.222</td>
                        </tr>
                    </tbody>
                 </table>)r   r$   r%   )rD   r   r   r   r   r   test_different_number_of_colst  s    z*TestReadHtml.test_different_number_of_colsc             C   s8   | j dd }tdddggdddgd	}tj|| d S )
NaZ  
            <table>
                <tr>
                    <th>A</th>
                    <th colspan="1">B</th>
                    <th rowspan="1">C</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                    <td>c</td>
                </tr>
            </table>
        r   abcr   r   C)r_   )r   r
   r$   r%   )rD   r   r   r   r   r   test_colspan_rowspan_1  s
    z#TestReadHtml.test_colspan_rowspan_1c             C   sD   | j dddd }tdddddggdd	d
ddgd}tj|| d S )Na  
            <table>
                <tr>
                    <td colspan="2">X</td>
                    <td>Y</td>
                    <td rowspan="2">Z</td>
                    <td>W</td>
                </tr>
                <tr>
                    <td>A</td>
                    <td colspan="2">B</td>
                    <td>C</td>
                </tr>
            </table>
        r   )re   r   r   Zr   XzX.1YW)r   r_   )r   r
   r$   r%   )rD   r   r   r   r   r    test_colspan_rowspan_copy_values  s     z-TestReadHtml.test_colspan_rowspan_copy_valuesc             C   sD   | j dddd }tdddddggddddd	gd
}tj|| d S )Na(  
            <table>
                <tr>
                    <td rowspan="2">A</td>
                    <td rowspan="2" colspan="3">B</td>
                    <td>C</td>
                </tr>
                <tr>
                    <td>D</td>
                </tr>
            </table>
        r   )re   r   r   DzB.1zB.2r   )r   r_   )r   r
   r$   r%   )rD   r   r   r   r   r   test_colspan_rowspan_both_not_1  s     z,TestReadHtml.test_colspan_rowspan_both_not_1c             C   s8   | j dddd }tddggddgd}tj|| d S )Nz
            <table>
                <tr>
                    <td>A</td>
                    <td rowspan="2">B</td>
                </tr>
                <tr>
                    <td>C</td>
                </tr>
            </table>
        r   )re   r   r   r   )r   r_   )r   r
   r$   r%   )rD   r   r   r   r   r   test_rowspan_at_end_of_row  s    z'TestReadHtml.test_rowspan_at_end_of_rowc             C   s>   | j dddd }tddgddggddgd}tj|| d S )Nz
            <table>
                <tr>
                    <td rowspan="3">A</td>
                    <td rowspan="3">B</td>
                </tr>
            </table>
        r   )re   r   r   )r   r_   )r   r
   r$   r%   )rD   r   r   r   r   r   test_rowspan_only_rows)  s    z#TestReadHtml.test_rowspan_only_rowsc             C   sT   | j dd }tddgddggddgddggd}tdd	gg|d
}tj|| d S )Nam  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <th>a</th>
                    <th>b</th>
                </tr>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </table>
        r   r   r   r   r   rg   )levelscodesrd   )r   r_   )r   r   r
   r$   r%   )rD   r   r_   r   r   r   r   +test_header_inferred_from_rows_with_only_th<  s    $z8TestReadHtml.test_header_inferred_from_rows_with_only_thc             C   sd   t dtdddi}|j }| j|dgdd}tj||d  | j|dgdd}tj||d  d S )Ndatez1/1/2001
   )periodsrg   r   )parse_datesr9   )r
   r   rN   r   r$   r%   )rD   rO   r   rQ   r   r   r   test_parse_dates_listV  s    z"TestReadHtml.test_parse_dates_listc             C   sn   t tddd}t|jdd |jdd d}| j|j dd	d
gid	d}td|i}tj||d  d S )Nz1/1/2001r   )r   c             S   s   t | j S )N)r   r   )r   r   r   r   r   b  s    z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>c             S   s   t | j S )N)r   time)r   r   r   r   r   c  s    )r   r   r   rg   rd   )r   r9   r   )r   r   r
   r"   r   rN   r$   r%   )rD   Z	raw_datesrO   rQ   Znewdfr   r   r   test_parse_dates_combine^  s    z%TestReadHtml.test_parse_dates_combinec             C   s   |dddd}t jj|s,tt| dt jj|sJtt| d| j|ddd	d
 }|jdksltd|jd ks~t|d j	t
j	dkstt
j|jd dstd S )Nr   r   r.   zwikipedia_states.htmlz is not a filez is an empty fileArizonarg   )r-   re   r   <      Unnamedsq mifloat64gHzPN$A)r   r   rr   )r   r   )r   r   isfiler    reprgetsizer   r   r_   dtypert   allcloseloc)rD   r   r   r   r   r   r   test_wikipedia_states_tablel  s    z(TestReadHtml.test_wikipedia_states_tablec             C   sp   |dddd}| j |dddd }|jdks0td
|jd d ksFt|jjdksVttj|jd dsltd S )Nr   r   r.   zwikipedia_states.htmlr   r   )r-   r9   r      r   rg   rd   AlaskaTotal area[2]sq migHzPN$A)r   r   rr   r   r   )r   r   )r   r   r    r_   Znlevelsrt   r   r   )rD   r   r   r   r   r   r    test_wikipedia_states_multiindexv  s    z-TestReadHtml.test_wikipedia_states_multiindexc             C   s4   d}t jt|d | jdddgd W d Q R X d S )NzGPassed header=\[0,1\] are too many rows for this multi_index of columns)r-   aK  
                <table>
                    <thead>
                        <tr><th></th><th></tr>
                        <tr><th>A</th><th>B</th></tr>
                    </thead>
                    <tbody>
                        <tr><td>a</td><td>b</td></tr>
                    </tbody>
                </table>
            r   rg   )re   )r1   r2   r	   r   )rD   r)   r   r   r   %test_parser_error_on_empty_header_row~  s
    z2TestReadHtml.test_parser_error_on_empty_header_rowc             C   sL   | j dddd }tddidgd}|d jtjdks<ttj|| d S )	Na  <html>
            <body>
             <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1100#101</td>
                    </tr>
                </tbody>
            </table>
            </body>
        </html>#)decimalr   r   gClg0@)r   r   r   )r   r
   r   rt   r    r$   r%   )rD   r   r   r   r   r   test_decimal_rows  s    zTestReadHtml.test_decimal_rowsc             C   s8   x2dD ]*}t jt | j| j|d W d Q R X qW d S )NTF)re   )TF)r1   r2   rv   r   rA   )rD   argr   r   r   test_bool_header_arg  s    
z!TestReadHtml.test_bool_header_argc             C   s6   | j ddtidd }tdddgi}tj|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                    </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>r   )r   r   z0.763z0.244)r   r   r
   r$   r%   )rD   r   r   r   r   r   test_converters  s    
zTestReadHtml.test_convertersc             C   s6   | j ddgdd }tddtjgi}tj|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                   </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>gZd;?)Z	na_valuesr   r   g"~j?)r   r
   rt   nanr$   r%   )rD   r   r   r   r   r   test_na_values  s    zTestReadHtml.test_na_valuesc             C   sh   d}t dddgi}| j|ddd }tj|| t dtjtjgi}| j|ddd }tj|| d S )	Na  <table>
                        <thead>
                            <tr>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> N/A</td>
                            </tr>
                            <tr>
                            <td> NA</td>
                            </tr>
                        </tbody>
                    </table>r   zN/AZNAF)Zkeep_default_nar   T)r
   r   r$   r%   rt   r   )rD   Z	html_dataexpected_dfhtml_dfr   r   r   test_keep_default_na  s    z!TestReadHtml.test_keep_default_nac             C   s>   | j dd }tddgtjtjggddgd}tj|| d S )Nak  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                </tr>
                <tr>
                    <td></td>
                    <td></td>
                </tr>
            </table>
        r   r   r   r   r   )r   r_   )r   r
   rt   r   r$   r%   )rD   r   r   r   r   r   test_preserve_empty_rows  s
     z%TestReadHtml.test_preserve_empty_rowsc             C   sT   | j dd }tddgddggddgddggd}tdd	gg|d
}tj|| d S )NaU  
            <table>
                <thead>
                    <tr><th></th><th></tr>
                    <tr><th>A</th><th>B</th></tr>
                    <tr><th>a</th><th>b</th></tr>
                </thead>
                <tbody>
                    <tr><td>1</td><td>2</td></tr>
                </tbody>
            </table>
        r   r   r   r   r   rg   )r   r   rd   )r   r_   )r   r   r
   r$   r%   )rD   r   r_   r   r   r   r   ,test_ignore_empty_rows_when_inferring_header  s    $z9TestReadHtml.test_ignore_empty_rows_when_inferring_headerc             C   sP   t dddgd	}d
ddgdddgg|_|jdd}| j|d }tj|| d S )NHillaryD   r   BernieJ   DonaldE   R)r   zUnnamed: 0_level_0ZAgeZPartyNamezUnnamed: 1_level_1zUnnamed: 2_level_1F)r   r   )r   r   r   )r   r  r   )r  r  r  )r
   r_   rN   r   r$   r%   )rD   r   r.   r   r   r   r   test_multiple_header_rows,  s    z&TestReadHtml.test_multiple_header_rowsc             C   s@   |dddd}| j |dd}t|ts*tt|d ts<td S )Nr   r   r.   zvalid_markup.htmlr   )r9   )r   r   r   r    r
   )rD   r   r:   ra   r   r   r   test_works_on_valid_markup9  s    z'TestReadHtml.test_works_on_valid_markupc             C   s&   |dddd}| j |dddgd d S )	Nr   r   r.   zbanklist.htmlz	.*Water.*r8   html5lib)r-   r/   )r   )rD   r   rC   r   r   r   test_fallback_success?  s    z"TestReadHtml.test_fallback_successc             C   s:   t ddd}ttjjdd|d}|j }d|ks6td S )Nz
2000-01-01r   )r   rH   )r   )r   r
   rt   randomZrandnrN   r    )rD   rngrO   r   r   r   r   test_to_html_timestampD  s    z#TestReadHtml.test_to_html_timestampzdisplayed_only,exp0,exp1ZfooNFzfoo  bar  baz  quxc             C   sT   t d}| j||d}tj|d | |d k	r@tj|d | nt|dksPtd S )Na  <html>
          <body>
            <table>
              <tr>
                <td>
                  foo
                  <span style="display:none;text-align:center">bar</span>
                  <span style="display:none">baz</span>
                  <span style="display: none">qux</span>
                </td>
              </tr>
            </table>
            <table style="display: none">
              <tr>
                <td>foo</td>
              </tr>
            </table>
          </body>
        </html>)displayed_onlyr   rg   )r   r   r$   r%   r   r    )rD   r  Zexp0Zexp1r   ra   r   r   r   test_displayed_onlyK  s    	z TestReadHtml.test_displayed_onlyc       
      C   s   t jj|}t jj|d }|jd\}}yt|d}| j|j |ddj }W d Q R X t|d"}| jt	|j |ddj }W d Q R X | j||ddj }	t
j|| t
j||	 W n4 tk
r   t rd|ksd|krtj   Y nX d S )Nr   _rb)r@   r9   Z16Z32)r   r   basenamesplitextsplitr|   r   r}   popr   r$   r%   	Exceptionr   r1   skip)
rD   r   	base_pathrootr  r@   ZfobjZfrom_stringZfrom_file_likefrom_filenamer   r   r   test_encoder  s&    "zTestReadHtml.test_encodec          	   C   sj   | j jjddkrtjd G dd dt}|d}| j |sBttjtdd | j | W d Q R X d S )	Nr/   r8   zNot applicable for lxmlc               @   s   e Zd Zdd ZdS )zFTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIOc             S   s   dS )NFr   )rD   r   r   r   seekable  s    zOTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIO.seekableN)__name__
__module____qualname__r  r   r   r   r   UnseekableStringIO  s   r  z?
            <table><tr><td>spam<foobr />eggs</td></tr></table>z#passed a non-rewindable file object)r-   )	r   keywordsgetr1   r  r   r    r2   r5   )rD   r  badr   r   r   test_parse_failure_unseekable  s    
z*TestReadHtml.test_parse_failure_unseekablec             C   s>   G dd d}|d}|d}| j |s,t| j |s:td S )Nc               @   s.   e Zd Zdd Zd
ddZdd Zdd	 ZdS )z9TestReadHtml.test_parse_failure_rewinds.<locals>.MockFilec             S   s   || _ d| _d S )NF)r   at_end)rD   r   r   r   r   __init__  s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__init__Nc             S   s   | j r
dn| j}d| _ |S )Nr   T)r$  r   )rD   sizer   r   r   r   r}     s    z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.readc             S   s
   d| _ d S )NF)r$  )rD   offsetr   r   r   seek  s    z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekc             S   s   dS )NTr   )rD   r   r   r   r    s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekable)N)r  r  r  r%  r}   r(  r  r   r   r   r   MockFile  s   
r)  z/<table><tr><td>spam<br />eggs</td></tr></table>z2<table><tr><td>spam<foobr />eggs</td></tr></table>)r   r    )rD   r)  Zgoodr"  r   r   r   test_parse_failure_rewinds  s
    z'TestReadHtml.test_parse_failure_rewindsc             C   s   G dd dt j}ttjj |dddd}|| j|fd}|| j|fd}|j  |j  x|j s^|j rpq^W d |j	  ko|j	kn  st
d S )Nc                   s   e Zd Z fddZ  ZS )z@TestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThreadc                sB   yt  j  W n( tk
r6 } z|| _W Y d d }~X nX d | _d S )N)superrunr  err)rD   r-  )	__class__r   r   r,    s
    zDTestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThread.run)r  r  r  r,  __classcell__r   r   )r.  r   ErrorThread  s   r0  r   r   r.   zvalid_markup.html)targetr'   )	threadingThreadr   pandasr   r.   r   startis_aliver-  r    )rD   r   r0  r:   Zhelper_thread1Zhelper_thread2r   r   r   test_importcheck_thread_safety  s    
z+TestReadHtml.test_importcheck_thread_safety)Yr  r  r  r1   fixturerE   rG   rR   r$   networkrX   rY   r[   markZslowr\   r`   rb   rc   rf   ri   rk   rl   rm   rn   rp   rs   ru   rw   rx   ry   rz   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r	  r  parametrizer
   r  r  r#  r*  r7  r   r   r   r   r>   W   s   	
		
&2D!
"#r>   )6	functoolsr   	importlibr   r   r   r   r   r   r2  urllib.errorr   Znumpyrt   Znumpy.randomr   r1   Zpandas.compatr   Zpandas.errorsr	   Zpandas.util._test_decoratorsutilZ_test_decoratorstdr4  r
   r   r   r   r   r   Zpandas._testingZ_testingr$   Zpandas.io.commonr   r   r   r   dirname__file__ZHEREr8  r   r*   Z
skip_if_nor4   r7   r;   r:  r;  r   r>   r   r   r   r   <module>   sB    
		