3
Ud                 @   s~   d Z ddlmZ ddlmZ ddlZddlZddlZ	ddlm
Z
 ddljZdd Zejjdd	 Zd
d Zejjdd ZdS )zd
Tests multithreading behaviour for reading and
parsing files for each parser defined in parsers.py
    )BytesIO)
ThreadPoolN)	DataFramec             C   s\   t tjj| dtdd}d|d< d|d< d|d< tjd| dd	|d
< tj| dd|d< |S )z
    Construct a DataFrame for testing.

    Parameters
    ----------
    num_rows : int
        The number of rows for our DataFrame.

    Returns
    -------
    df : DataFrame
       Zabcde)columnsZfoobarZbazz20000101 09:00:00s)ZperiodsfreqdateZint64)Zdtypeint)r   nprandomZrandlistpdZ
date_rangeZarange)num_rowsdf r   \/var/www/html/virt/lib64/python3.6/site-packages/pandas/tests/io/parser/test_multi_thread.py_construct_dataframe   s    r   c       	         sl   | }d d} fddt |D }dd |D }td}|j|j|}|d }x|D ]}tj|| qTW d S )Ni'  d   c                s(   g | ] }d j dd t D j qS )
c             S   s&   g | ]}|d d|d d|d qS )d,r   ).0ir   r   r   
<listcomp>.   s    zCtest_multi_thread_string_io_read_csv.<locals>.<listcomp>.<listcomp>)joinrangeencode)r   _)max_row_ranger   r   r   .   s   z8test_multi_thread_string_io_read_csv.<locals>.<listcomp>c             S   s   g | ]}t |qS r   )r   )r   br   r   r   r   1   s       r   )r   r   mapread_csvtmassert_frame_equal)	all_parsersparserZ	num_filesZbytes_to_dffilespoolresultsZfirst_resultresultr   )r    r   $test_multi_thread_string_io_read_csv&   s    

r-   c                sp   fdd} fddt D }td}|j||}|d j}x|dd D ]
}	||	_qTW tj|}
|
S )	a  
    Generate a DataFrame via multi-thread.

    Parameters
    ----------
    parser : BaseParser
        The parser object to use for reading the data.
    path : str
        The location of the CSV file to read.
    num_rows : int
        The number of rows to read per task.
    num_tasks : int
        The number of tasks to use for reading this DataFrame.

    Returns
    -------
    df : DataFrame
    c                sB   | \}}|s" j dd|dgdS  j ddt|d |dgdS )aj  
        Create a reader for part of the CSV.

        Parameters
        ----------
        arg : tuple
            A tuple of the following:

            * start : int
                The starting row to start for parsing CSV
            * nrows : int
                The number of rows to read.

        Returns
        -------
        df : DataFrame
        r   r
   )	index_colheadernrowsparse_datesN   	   )r.   r/   Zskiprowsr0   r1   )r$   r   )argstartr0   )r(   pathr   r   readerQ   s    
z0_generate_multi_thread_dataframe.<locals>.readerc                s    g | ]} |    fqS r   r   )r   r   )r   	num_tasksr   r   r   t   s    z4_generate_multi_thread_dataframe.<locals>.<listcomp>)	processesr   r2   N)r   r   r#   r   r   concat)r(   r6   r   r8   r7   tasksr*   r+   r/   rfinal_dataframer   )r   r8   r(   r6   r    _generate_multi_thread_dataframe=   s    #



r>   c             C   sV   d}d}| }d}t |}tj|*}|j| t||||}tj|| W d Q R X d S )N   i z__thread_pool_reader__.csv)r   r%   Zensure_cleanZto_csvr>   r&   )r'   r8   r   r(   	file_namer   r6   r=   r   r   r   )test_multi_thread_path_multipart_read_csv   s    
rA   )__doc__ior   Zmultiprocessing.poolr   Znumpyr   ZpytestZpandasr   r   Zpandas._testingZ_testingr%   r   markZslowr-   r>   rA   r   r   r   r   <module>   s   
F