3
Ud=n                 @   s  d Z ddlZddlmZ ddlmZ ddlZddlmZ ddl	Z
ddlZddljjZddlZddljZddlmZmZmZmZmZ yddlZdZW n ek
r   dZY nX yddlZdZW n ek
r   dZY nX ej j!d	Z"ej#ej$d
ej j%e dddej$dej j%e dddgddd Z&ej#dd Z'ej#dd Z(ej#dd Z)ej#dd Z*ej#dd Z+d8ddZ,d d! Z-d"d# Z.d$d% Z/d&d' Z0d(d) Z1d*d+ Z2d,d- Z3d.d/ Z4G d0d1 d1Z5G d2d3 d3e5Z6G d4d5 d5e5Z7G d6d7 d7e5Z8dS )9z test parquet compat     N)LooseVersion)BytesIO)catch_warnings)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFz4ignore:RangeIndex.* is deprecated:DeprecationWarningfastparquetzfastparquet is not installed)reason)Zmarkspyarrowzpyarrow is not installed)paramsc             C   s   | j S )N)param)request r   P/var/www/html/virt/lib64/python3.6/site-packages/pandas/tests/io/test_parquet.pyengine-   s    r   c               C   s   t stjd dS )Nzpyarrow is not installedr   )_HAVE_PYARROWpytestskipr   r   r   r   paA   s    
r   c               C   s   t stjd dS )Nzfastparquet is not installedr
   )_HAVE_FASTPARQUETr   r   r   r   r   r   fpH   s    
r   c               C   s   t jdddgddS )N         foo)AB)pd	DataFramer   r   r   r   	df_compatO   s    r!   c           	   C   sD   t jtdttddtjdddddd	dgt jd
ddd} | S )Nabcr      g      @g      @float64)dtypeTF20130101r   )periods)abdef)r   r    listrangenparange
date_range)dfr   r   r   df_cross_compatT   s    r3   c               C   s   t jtddtjdgdd dgdddgddd	gttd
dtjddjdtjdddddtjdgdddgt jdddt j	dt j
t j	dgdS )Nr"   r(   cs   foos   bars   bazr   barbazr   r#   r      u1g      @g      @r$   )r%   g       @g      @TFr&   )r'   Z20130103)stringZstring_with_nanZstring_with_nonebytesunicodeintZuintfloatZfloat_with_nanbooldatetimeZdatetime_with_nat)r   r    r-   r/   nanr.   r0   astyper1   	TimestampZNaTr   r   r   r   df_fullf   s    

rC   r   c	       
   	      s~   p
ddipi dkr |r4|d< |d<  fdd}	dkrrt j |	| W dQ R X n|	| dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr   c                sX   xRt | D ]F}jf tdd tf}W d Q R X tj| d q
W d S )NT)record)check_names
check_like)r.   r	   r   r   tmassert_frame_equal)repeat_actual)rG   rF   r2   expectedpathread_kwargswrite_kwargsr   r   compare   s    z!check_round_trip.<locals>.compare)rH   ensure_clean)
r2   r   rN   rP   rO   rM   rF   rG   rJ   rQ   r   )rG   rF   r2   rM   rN   rO   rP   r   check_round_trip~   s    !

rS   c             C   s&   t jt t| dd W d Q R X d S )Nr   r5   )r   raises
ValueErrorrS   )r!   r   r   r   test_invalid_engine   s    rV   c          	   C   s$   t jdd t|  W d Q R X d S )Nzio.parquet.enginer   )r   option_contextrS   )r!   r   r   r   r   test_options_py   s    rX   c          	   C   s$   t jdd t|  W d Q R X d S )Nzio.parquet.enginer
   )r   rW   rS   )r!   r   r   r   r   test_options_fp   s    rY   c          	   C   s$   t jdd t|  W d Q R X d S )Nzio.parquet.engineauto)r   rW   rS   )r!   r   r   r   r   r   test_options_auto   s    r[   c             C   s  t tdtstt tdts$ttjdd< t tdtsDtt tdtsVtt tdtshtW d Q R X tjdd< t tdtstt tdtstt tdtstW d Q R X tjdd> t tdtstt tdtstt tdtstW d Q R X d S )Nr   r
   zio.parquet.enginerZ   )
isinstancer   r   AssertionErrorr   r   rW   )r   r   r   r   r   test_options_get_engine   s    r^   c              C   s0  ddl m}  | jd}| jd}ts(dnttjt|k }tsBdnttjt|k }to\| }tof| }| or| r,|rd| d}t	j
t|d td	 W d Q R X n&d
}t	j
t|d td	 W d Q R X |rd| d}t	j
t|d td	 W d Q R X n&d}t	j
t|d td	 W d Q R X d S )Nr   )VERSIONSr   r
   FzPandas requires version .z. or newer of .pyarrow.)matchrZ   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr_   getr   r   r   __version__r   r
   r   rT   ImportErrorr   )r_   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpr`   r   r   r   "test_get_engine_auto_error_message   s0    



rd   c             C   sj   | }t j T}|j||d d t||d}t j|| t||ddgd}t j||ddg  W d Q R X d S )N)r   rD   )r   r(   r*   )r   columns)rH   rR   r	   r   rI   )r3   r   r   r2   rN   resultr   r   r   test_cross_engine_pa_fp  s    
rg   c             C   s   t tjdk r&t tjdkr&tjd | }tj j}|j||d d tddD t	||d}tj
|| t	||dd	gd
}tj
||dd	g  W d Q R X W d Q R X d S )Nz0.15z0.13z`Reading fastparquet with pyarrow in 0.14 fails: https://issues.apache.org/jira/browse/ARROW-6492)r   rD   T)rE   )r   r(   r*   )r   re   )r   r   rb   r   ZxfailrH   rR   r	   r   r   rI   )r3   r   r   r2   rN   rf   r   r   r   test_cross_engine_fp_pa!  s    
rh   c               @   s   e Zd Zdd ZdS )Basec             C   s>   t j ,}tj| t|||d d W d Q R X W d Q R X d S )N)rD   )rH   rR   r   rT   r	   )selfr2   r   excrN   r   r   r   check_error_on_write:  s    
zBase.check_error_on_writeN)__name__
__module____qualname__rl   r   r   r   r   ri   9  s   ri   c               @   sr   e Zd Zdd Zdd Zdd Zejjddd	d
dgdd Z	dd Z
dd Zdd Zdd Zdd Zdd ZdS )	TestBasicc             C   sH   xBt jdddgddt jdtjdddggD ]}| j||t q.W d S )Nr   r   r   r   r&   )r   SeriesrB   r/   arrayrl   rU   )rj   r   objr   r   r   
test_errorB  s    zTestBasic.test_errorc             C   s6   t jtdttddd}ddg|_t|| d S )Nr"   r   r#   )r9   r<   r   r5   )r   r    r-   r.   re   rS   )rj   r   r2   r   r   r   test_columns_dtypesL  s    
zTestBasic.test_columns_dtypesc             C   s   t jtdttddd}ddg|_| j||t ddg|_| j||t tjdddddtjdddddg|_| j||t d S )	Nr"   r   r#   )r9   r<   r   s   foos   bari  )r   r    r-   r.   re   rl   rU   r?   )rj   r   r2   r   r   r   test_columns_dtypes_invalidS  s    

z%TestBasic.test_columns_dtypes_invalidrD   Ngzipsnappybrotlic             C   sP   |dkrt jd n|dkr&t jd tjddddgi}t||d|id d S )	Nrx   ry   r   r   r   r   rD   )rP   )r   importorskipr   r    rS   )rj   r   rD   r2   r   r   r   test_compressione  s    
zTestBasic.test_compressionc             C   sJ   t jtdttddd}t jdtdi}t|||ddgid d S )Nr"   r   r#   )r9   r<   r9   re   )rM   rO   )r   r    r-   r.   rS   )rj   r   r2   rM   r   r   r   test_read_columnsq  s    zTestBasic.test_read_columnsc             C   s   |dk}t jddddgi}t|| dddgt jdddtd	dddgg}x:|D ]2}||_t|t jrv|jjd |_t|||d
 qRW dddg|_d|j_	t|| d S )Nr
   r   r   r   r   r#   r&   )r'   r"   )rF   r   r   )
r   r    rS   r1   r-   indexr\   ZDatetimeIndex
_with_freqname)rj   r   rF   r2   Zindexesr}   r   r   r   test_write_indexz  s    

zTestBasic.test_write_indexc             C   s>   |}t jddddgi}t jjddd	g}||_t|| d S )
Nr   r   r   r   r(   r)   )r(   r   )r(   r   )r)   r   )r   r    
MultiIndexfrom_tuplesr}   rS   )rj   r   r   r2   r}   r   r   r   test_write_multiindex  s
    zTestBasic.test_write_multiindexc             C   s<   t jjdd	d
g}t jtjjdd|d}| j||t d S )Nr(   r   r   r)   r#   r   )re   )r(   r   )r(   r   )r)   r   )	r   r   r   r    r/   randomrandnrl   rU   )rj   r   Z
mi_columnsr2   r   r   r   test_write_column_multiindex  s    z&TestBasic.test_write_column_multiindexc             C   s   |}t jdddd}t jtjjdt| dtdd}t jj	d	d
g|gddgd}|j
d d}x@||gD ]4}||_t|| t||dddgi|ddg d qjW d S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr   r   ABC)re   ZLevel1ZLevel2leveldate)namesre   r   r   )rO   rM   )r   r1   r    r/   r   r   lenr-   r   Zfrom_productcopyr}   rS   )rj   r   r   datesr2   Zindex1index2r}   r   r   r   test_multiindex_with_columns  s    $
z&TestBasic.test_multiindex_with_columnsc          	   C   s   t jdddgdddgd}d dd	}|jd
d}t||||d t jdddgdddgddddgd}t||||d ddddddddgddddddddgg}t jttddd tdD d|d}|jd
d}t||||d d S )Nr   r   r   qrs)r(   r)   F)rD   r}   T)Zdrop)rP   rM   ZzyxZwvuZtsr)r}   r5   r6   r   Zquxonetwo   c             S   s   g | ]
}| qS r   r   ).0ir   r   r   
<listcomp>  s    z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r   r    Zreset_indexrS   r-   r.   )rj   r   r2   rP   rM   Zarraysr   r   r   test_write_ignoring_index  s    
"&z#TestBasic.test_write_ignoring_index)rm   rn   ro   rt   ru   rv   r   markparametrizer{   r|   r   r   r   r   r   r   r   r   r   rp   A  s   
		rp   c               @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jdejjddgg gdd Zeje
jddd Ze
jddd Ze
jddd Zdd Zdd Zdd  Zd!d" Ze
jdd#d$d%d& Ze
jdd'd$d(d) Ze
jdd*d$d+d, Ze
jdd-d$d.d/ Zd0S )1TestParquetPyArrowc             C   sB   |}t jdddd}|jd }||d< dd dg|d< t|| d S )Nr&   r   zEurope/Brussels)r'   tzdatetime_tzTZbool_with_none)r   r1   r~   rS   )rj   r   rC   r2   dtir   r   r   
test_basic  s    
zTestParquetPyArrow.test_basicc             C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr&   r   zEurope/Brussels)r'   r   r   r9   r<   re   )rM   rO   )r   r1   rS   )rj   r   rC   r2   r   r   r   test_basic_subset_columns  s    
z,TestParquetPyArrow.test_basic_subset_columnsc             C   s6   t jtjdjddtddj }| j||t d S )N   r#   r   aaa)re   )	r   r    r/   r0   reshaper-   r   rl   rU   )rj   r   r2   r   r   r   test_duplicate_columns  s    $z)TestParquetPyArrow.test_duplicate_columnsc             C   s   t tjt dk r:tjdtjddddi}| j||t tjdtjdddi}| j||t	 tjddd	d
gi}| j||t d S )Nz
0.15.1.devr(   2013Mr   )r   r'   z1 day)r'   r   g       @)
r   r   rb   r   r    period_rangerl   	Exceptiontimedelta_rangeNotImplementedError)rj   r   r2   r   r   r   test_unsupported  s    z#TestParquetPyArrow.test_unsupportedc             C   s   t j }t jtd|d< t jddddd dgt jdddgd|d< t jddddddgddd	gd
d|d< ttjtdkrt|| n|j	t
}t|||d d S )NZabcdefr(   r5   r   r6   )r%   r)   r4   r*   T)
categoriesZorderedz0.15.0)rM   )r   r    Categoricalr-   ZCategoricalDtyper   r   rb   rS   rA   object)rj   r   r2   rM   r   r   r   test_categorical  s    "
z#TestParquetPyArrow.test_categoricalc             C   s2   t jd}|j }t|d}t||d||d d S )Ns3fs)
filesystemzpandas-test/pyarrow.parquet)rN   rO   rP   )r   rz   ZS3FileSystemdictrS   )rj   r!   s3_resourcer   r   Zs3kwr   r   r   test_s3_roundtrip_explicit_fs  s    

z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc             C   s   t ||dd d S )Nz s3://pandas-test/pyarrow.parquet)rN   )rS   )rj   r!   r   r   r   r   r   test_s3_roundtrip(  s    z$TestParquetPyArrow.test_s3_roundtripr   partition_colr   c          	   C   sp   |j  }ttjtdko*ttjtdk }|rP|r:d}nd}|| j|||< t|||d|d dddd	 d S )
Nz1.0.0z2.0.0Zint32categoryzs3://pandas-test/parquet_dir)partition_colsrD   Tr   )rM   rN   rP   rG   rJ   )r   r   r   rb   rA   rS   )rj   r!   r   r   r   Zexpected_dfZpa10Zpartition_col_typer   r   r   test_s3_roundtrip_for_dir,  s"    
z,TestParquetPyArrow.test_s3_roundtrip_for_dirr   c             C   s   d}t j|}tj|| d S )Nzfhttps://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/io/data/parquet/simple.parquet)r   r   rH   rI   )rj   r!   urlr2   r   r   r   test_parquet_read_from_urlO  s    
z-TestParquetPyArrow.test_parquet_read_from_urlc             C   s*   t  }|j| tj|}tj|| d S )N)r   r	   r   r   rH   rI   )rj   r!   bufferZdf_from_bufr   r   r   test_read_file_like_obj_supportY  s    

z2TestParquetPyArrow.test_read_file_like_obj_supportc             C   sd   |j dd |j dd tjtdd tjd W d Q R X tjtdd |jd W d Q R X d S )NHOMEZTestingUserUSERPROFILEz.*TestingUser.*)r`   z~/file.parquet)Zsetenvr   rT   OSErrorr   r   r	   )rj   r!   Zmonkeypatchr   r   r   test_expand_user`  s    z#TestParquetPyArrow.test_expand_userc             C   st   ddg}|}t j V}|j||d d dd lj}|j|dd}t|jjdksRt	|jjt
|ksft	W d Q R X d S )Nr>   r<   )r   rD   r   F)validate_schemar   )rH   ensure_clean_dirr	   pyarrow.parquetparquetParquetDatasetr   
partitionspartition_namesr]   set)rj   r   rC   r   r2   rN   pqdatasetr   r   r   test_partition_cols_supportedi  s    

z0TestParquetPyArrow.test_partition_cols_supportedc       	      C   sv   d}|g}|}t j V}|j||d d dd lj}|j|dd}t|jjdksTt	|jjt
|ksht	W d Q R X d S )Nr>   )r   rD   r   F)r   r   )rH   r   r	   r   r   r   r   r   r   r]   r   )	rj   r   rC   r   Zpartition_cols_listr2   rN   r   r   r   r   r   test_partition_cols_stringu  s    

z-TestParquetPyArrow.test_partition_cols_stringc             C   s   t j }t|| d S )N)r   r    rS   )rj   r   r2   r   r   r   test_empty_dataframe  s    z'TestParquetPyArrow.test_empty_dataframec             C   sV   dd l }tjdddgi}|j|jd|j dg}|jt}t||d|i|d d S )Nr   xr   )typeschema)rP   rM   )	r   r   r    r   fieldZbool_rA   r>   rS   )rj   r   r   r2   r   Zout_dfr   r   r   test_write_with_schema  s
    
z)TestParquetPyArrow.test_write_with_schemaz0.15.0)min_versionc             C   s   t jt jdddgddt jdddgddt jdd dgd	dd
}ttjtdkrZ|}n(|j|jjd|j	jd|j
jdd
}t|||d t jdt jdddd gddi}ttjtdkr|}n|j|jjdd}t|||d d S )Nr   r   r   ZInt64)r%   ZUInt32r(   r4   r9   )r(   r)   r4   z0.16.0Zint64r   )rM   r$   )r(   )r   r    rq   r   r   rb   Zassignr(   rA   r)   r4   rS   )rj   r   r2   rM   r   r   r    test_additional_extension_arrays  s    $ z3TestParquetPyArrow.test_additional_extension_arraysz0.16.0c             C   s(   t jdt jddddi}t|| d S )Nr*   z
2012-01-01r   D)r'   r   )r   r    r   rS   )rj   r   r2   r   r   r   test_additional_extension_types  s    z2TestParquetPyArrow.test_additional_extension_typesz0.14c             C   s0   t jdt jddddi}t||ddid d S )	Nr(   z
2017-01-01Z1n
   )r   r'   versionz2.0)rP   )r   r    r1   rS   )rj   r   r2   r   r   r   test_timestamp_nanoseconds  s    z-TestParquetPyArrow.test_timestamp_nanosecondsz0.17c             C   s^   t jdttddi}tj $}|j|| t||dgdd}W d Q R X t|dksZt	d S )	Nr(   r   r   ==F)filtersZuse_legacy_datasetr   )r(   r   r   )
r   r    r-   r.   rH   rR   r	   r   r   r]   )rj   r   r2   rN   rf   r   r   r   test_filter_row_groups  s    
z)TestParquetPyArrow.test_filter_row_groupsN)rm   rn   ro   r   r   r   r   r   r   r   td
skip_if_nor   r   r   r   rH   networkr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s*   "		r   c               @   s   e Zd Zejddddd Zejjdddd	 Z	d
d Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )TestParquetFastParquetr
   z0.3.2)r   c             C   sF   |}t jdddd}|jd }||d< t jddd|d< t|| d S )	Nr&   r   z
US/Eastern)r'   r   r   z1 day)r'   	timedelta)r   r1   r~   r   rS   )rj   r   rC   r2   r   r   r   r   r     s    
z!TestParquetFastParquet.test_basicznot supported)r   c             C   s6   t jtjdjddtddj }| j||t d S )Nr   r#   r   r   )re   )	r   r    r/   r0   r   r-   r   rl   rU   )rj   r   r2   r   r   r   r     s    $z-TestParquetFastParquet.test_duplicate_columnsc             C   s@   t jddd dgi}t jddtjdgidd}t|||d d S )	Nr(   TFg      ?g        Zfloat16)r%   )rM   )r   r    r/   r@   rS   )rj   r   r2   rM   r   r   r   test_bool_with_none  s    z*TestParquetFastParquet.test_bool_with_nonec             C   sN   t jdt jddddi}| j||t t jddddgi}| j||t d S )Nr(   r   r   r   )r   r'   r   g       @)r   r    r   rl   rU   )rj   r   r2   r   r   r   r     s    z'TestParquetFastParquet.test_unsupportedc             C   s&   t jdt jtdi}t|| d S )Nr(   r"   )r   r    r   r-   rS   )rj   r   r2   r   r   r   r     s    z'TestParquetFastParquet.test_categoricalc             C   sf   dt tddi}tj|}tj (}|j||d dd t||dgd}W d Q R X t|dksbt	d S )	Nr(   r   r   r   )rD   Zrow_group_offsets==)r   )r(   r   r   )
r-   r.   r   r    rH   rR   r	   r   r   r]   )rj   r   r*   r2   rN   rf   r   r   r   r     s    

z-TestParquetFastParquet.test_filter_row_groupsc             C   s   t ||dd d S )Nz$s3://pandas-test/fastparquet.parquet)rN   )rS   )rj   r!   r   r   r   r   r   r     s    z(TestParquetFastParquet.test_s3_roundtripc             C   sl   ddg}|}t j N}|j|d|d d tjj|s8tdd l}|j|dj	}t
|dks^tW d Q R X d S )Nr>   r<   r
   )r   r   rD   r   Fr   )rH   r   r	   osrN   existsr]   r
   ParquetFilecatsr   )rj   r   rC   r   r2   rN   r
   actual_partition_colsr   r   r   r     s    
z4TestParquetFastParquet.test_partition_cols_supportedc             C   sh   d}|}t j N}|j|d|d d tjj|s4tdd l}|j|dj	}t
|dksZtW d Q R X d S )Nr>   r
   )r   r   rD   r   Fr   )rH   r   r	   r   rN   r   r]   r
   r   r   r   )rj   r   rC   r   r2   rN   r
   r   r   r   r   r     s    
z1TestParquetFastParquet.test_partition_cols_stringc             C   sl   ddg}|}t j N}|j|dd |d tjj|s8tdd l}|j|dj	}t
|dks^tW d Q R X d S )Nr>   r<   r
   )r   rD   partition_onr   Fr   )rH   r   r	   r   rN   r   r]   r
   r   r   r   )rj   r   rC   r   r2   rN   r
   r   r   r   r   test_partition_on_supported  s    
z2TestParquetFastParquet.test_partition_on_supportedc             C   sN   ddg}|}t jt. tj }|j|dd ||d W d Q R X W d Q R X d S )Nr>   r<   r
   )r   rD   r   r   )r   rT   rU   rH   r   r	   )rj   r   rC   r   r2   rN   r   r   r   3test_error_on_using_partition_cols_and_partition_on/  s    
zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onc             C   s*   t j }|j }d|j_t|||d d S )Nr}   )rM   )r   r    r   r}   r   rS   )rj   r   r2   rM   r   r   r   r   =  s    z+TestParquetFastParquet.test_empty_dataframeN)rm   rn   ro   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s   

r   )NNNNNTFr   )9__doc__r?   Zdistutils.versionr   ior   r   warningsr   Znumpyr/   r   Zpandas.util._test_decoratorsutilZ_test_decoratorsr   Zpandasr   Zpandas._testingZ_testingrH   Zpandas.io.parquetr   r   r   r   r	   r   r   rc   r
   r   r   filterwarningsZ
pytestmarkZfixturer   Zskipifr   r   r   r!   r3   rC   rS   rV   rX   rY   r[   r^   rd   rg   rh   ri   rp   r   r   r   r   r   r   <module>   sv   


	       
3+  z