3
qdd*              	   @   s   d Z ddlmZmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZmZ ddlmZmZmZ ed	d
ddZG dd	 d	ZG dd deZG dd deZdee
e eee ee eee  dddZdedddZdS )z parquet compat     )AnyAnyStrDictListOptional)catch_warnings)FilePathOrBuffer)import_optional_dependency)AbstractMethodError)	DataFrame
get_option)_expand_userget_filepath_or_bufferis_fsspec_urlBaseImpl)enginereturnc             C   s   | dkrt d} | dkrzttg}d}xF|D ]>}y| S  tk
rf } z|dt| 7 }W Y dd}~X q*X q*W td| | dkrt S | dkrt S td	dS )
z return our implementation autozio.parquet.engine z
 - NzUnable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:pyarrowfastparquetz.engine must be one of 'pyarrow', 'fastparquet')r   PyArrowImplFastParquetImplImportErrorstr
ValueError)r   Zengine_classesZ
error_msgsZengine_classerr r   3/tmp/pip-build-7vycvbft/pandas/pandas/io/parquet.py
get_engine   s"    
&	r   c               @   s6   e Zd ZeedddZedddZd	ddZdS )
r   )dfc             C   sL   t | tstd| jjdkr&tdtdd | jjD }|sHtdd S )	Nz+to_parquet only supports IO with DataFramesstringemptyz%parquet must have string column namesc             s   s    | ]}|d k	rt |tV  qd S )N)
isinstancer   ).0namer   r   r   	<genexpr>>   s    z.BaseImpl.validate_dataframe.<locals>.<genexpr>z!Index level names must be strings>   r!   r"   )r#   r   r   columnsZinferred_typeallindexnames)r    Zvalid_namesr   r   r   validate_dataframe2   s    
zBaseImpl.validate_dataframec             K   s   t | d S )N)r
   )selfr    pathcompressionkwargsr   r   r   writeC   s    zBaseImpl.writeNc             K   s   t | d S )N)r
   )r,   r-   r'   r/   r   r   r   readF   s    zBaseImpl.read)N)__name__
__module____qualname__staticmethodr   r+   r0   r1   r   r   r   r   r   1   s   c               @   sJ   e Zd Zdd Zd
eee ee ee	 ee
e  dddZddd	ZdS )r   c             C   s&   t ddd dd l}dd l}|| _d S )Nr   z(pyarrow is required for parquet support.)extrar   )r	   Zpyarrow.parquetZpandas.core.arrays._arrow_utilsapi)r,   r   pandasr   r   r   __init__K   s
    
zPyArrowImpl.__init__snappyN)r    r-   r.   r)   partition_colsc             K   s   | j | d|jdd i}|d k	r*||d< | jjj|f|}t|rvd|krvtd dd l}	|	jj	|\}
}|
|d< nt
|}|d k	r| jjj||f||d| n| jjj||fd|i| d S )NZschemaZpreserve_index
filesystemfsspecr   )r.   r;   r.   )r+   popr7   ZTableZfrom_pandasr   r	   fsspec.corecore	url_to_fsr   parquetZwrite_to_datasetZwrite_table)r,   r    r-   r.   r)   r;   r/   Zfrom_pandas_kwargstabler=   fsr   r   r   r0   V   s&    	


zPyArrowImpl.writec       	      K   s   t |r6d|kr6td dd l}|jj|\}}d}n|jdd }d}t|}|sbt|\}}}}d|d< | jj	j
|f||d|j }|r|j  |S )Nr<   r=   r   FTZuse_pandas_metadata)r'   r<   )r   r	   r?   r@   rA   r>   r   r   r7   rB   Z
read_table	to_pandasclose)	r,   r-   r'   r/   r=   rD   Zshould_close_resultr   r   r   r1   }   s     zPyArrowImpl.read)r:   NN)N)r2   r3   r4   r9   r   r   r   r   r   boolr   r0   r1   r   r   r   r   r   J   s
     ,!r   c               @   s.   e Zd Zdd Zd
edddZddd	ZdS )r   c             C   s   t ddd}|| _d S )Nr   z,fastparquet is required for parquet support.)r6   )r	   r7   )r,   r   r   r   r   r9      s    
zFastParquetImpl.__init__r:   N)r    c                s   | j | d|kr$|d k	r$tdnd|kr6|jd}|d k	rFd|d< t|rhtd  fdd|d< nt|\}}}}td	d
$ | jj||f|||d| W d Q R X d S )Npartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning dataZhiveZfile_schemer=   c                s    j | dj  S )Nwb)open)r-   rG   )r=   r   r   <lambda>   s    z'FastParquetImpl.write.<locals>.<lambda>	open_withT)record)r.   Zwrite_indexrJ   )	r+   r   r>   r   r	   r   r   r7   r0   )r,   r    r-   r.   r)   r;   r/   rG   r   )r=   r   r0      s(    	

zFastParquetImpl.writec                s^   t |r.td  fdd}| jj||d}nt|\}}}}| jj|}|jf d|i|S )Nr=   c                s    j | dj  S )Nrb)rL   )r-   rG   )r=   r   r   rM      s    z&FastParquetImpl.read.<locals>.<lambda>)rN   r'   )r   r	   r7   ZParquetFiler   rE   )r,   r-   r'   r/   rN   Zparquet_filerG   r   )r=   r   r1      s    zFastParquetImpl.read)r:   NN)N)r2   r3   r4   r9   r   r0   r1   r   r   r   r   r      s
     %r   r   r:   N)r    r-   r   r.   r)   r;   c             K   s4   t |tr|g}t|}|j| |f|||d|S )a  
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str or file-like object
        If a string, it will be used as Root Directory path
        when writing a partitioned dataset. By file-like object,
        we refer to objects with a write() method, such as a file handler
        (e.g. via builtin open function) or io.BytesIO. The engine
        fastparquet does not accept file-like objects.

        .. versionchanged:: 0.24.0

    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
        Name of the compression to use. Use ``None`` for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.

        .. versionadded:: 0.24.0

    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.

        .. versionadded:: 0.24.0

    kwargs
        Additional keyword arguments passed to the engine
    )r.   r)   r;   )r#   r   r   r0   )r    r-   r   r.   r)   r;   r/   implr   r   r   
to_parquet   s    4
rR   )r   c             K   s   t |}|j| fd|i|S )a  
    Load a parquet object from the file path, returning a DataFrame.

    Parameters
    ----------
    path : str, path object or file-like object
        Any valid string path is acceptable. The string could be a URL. Valid
        URL schemes include http, ftp, s3, and file. For file URLs, a host is
        expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``

        If you want to pass in a path object, pandas accepts any
        ``os.PathLike``.

        By file-like object, we refer to objects with a ``read()`` method,
        such as a file handler (e.g. via builtin ``open`` function)
        or ``StringIO``.
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    columns : list, default=None
        If not None, only these columns will be read from the file.
    **kwargs
        Any additional kwargs are passed to the engine.

    Returns
    -------
    DataFrame
    r'   )r   r1   )r-   r   r'   r/   rQ   r   r   r   read_parquet  s    $rS   )r   r:   NN)r   N)__doc__typingr   r   r   r   r   warningsr   Zpandas._typingr   Zpandas.compat._optionalr	   Zpandas.errorsr
   r8   r   r   Zpandas.io.commonr   r   r   r   r   r   r   r   rI   rR   rS   r   r   r   r   <module>   s    "LD   .;