3
qdJH              	   @   sb  d Z ddlZddlmZ ddlZddlmZmZmZ ddl	Z	ddl
Z
ddlZddlmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZ ddlZddl m!Z! ddl"m#Z#m$Z$ dd	l%m&Z& dd
l'm(Z( e$ Z)e*ee e Z+e+j,d er
ddlm-Z- e.dddZ/e!e e!e dddZ0ddddZ1e!e e!e dddZ2dd Z3e!e.dddZ4d6e!ee5 ee5 ee5 eee5ef  dddZ6e5e5dd d!Z7d"d#d$d%d&Z8eee5ee5e5f f  eee5 ee5e5f f d'd(d)Z9e!ee5 ee5 d*d+d,Z:d7e5eee5ee5ef f  e.e.d/d0d1Z;G d2d3 d3ej<eZ=G d4d5 d5ej>Z?dS )8zCommon IO api utilities    N)abc)BufferedIOBaseBytesIO	RawIOBase)IOTYPE_CHECKINGAnyAnyStrDictListMappingOptionalTupleTypeUnion)urljoinurlparseuses_netlocuses_paramsuses_relative)FilePathOrBuffer)_get_lzma_file_import_lzma)import_optional_dependency)is_file_like )IOBase)returnc             C   s   t | tsdS t| jtkS )z
    Check to see if a URL has a valid protocol.

    Parameters
    ----------
    url : str or unicode

    Returns
    -------
    isurl : bool
        If `url` has a valid protocol return True otherwise False.
    F)
isinstancestr	parse_urlscheme_VALID_URLS)url r$   2/tmp/pip-build-7vycvbft/pandas/pandas/io/common.pyis_url1   s    
r&   )filepath_or_bufferr   c             C   s   t | trtjj| S | S )a]  
    Return the argument with an initial component of ~ or ~user
    replaced by that user's home directory.

    Parameters
    ----------
    filepath_or_buffer : object to be converted if possible

    Returns
    -------
    expanded_filepath_or_buffer : an expanded filepath or the
                                  input if not expandable
    )r   r   ospath
expanduser)r'   r$   r$   r%   _expand_userC   s    
r+   c             C   s   t | trtdd S )NzPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column names)r   bool	TypeError)headerr$   r$   r%   validate_header_argX   s    
r/   c             C   s.   t | dr| j S t| tjr&t| S t| S )ad  
    Attempt to convert a path-like object to a string.

    Parameters
    ----------
    filepath_or_buffer : object to be converted

    Returns
    -------
    str_filepath_or_buffer : maybe a string version of the object

    Notes
    -----
    Objects supporting the fspath protocol (python 3.6+) are coerced
    according to its __fspath__ method.

    For backwards compatibility with older pythons, pathlib.Path and
    py.path objects are specially coerced.

    Any other object is passed through unchanged, which includes bytes,
    strings, buffers, or anything else that's not even path-like.
    
__fspath__)hasattrr0   r   pathlibPathr   r+   )r'   r$   r$   r%   stringify_patha   s
    
r4   c              O   s   ddl }|jj| |S )z`
    Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
    the stdlib.
    r   N)urllib.requestrequesturlopen)argskwargsurllibr$   r$   r%   r7      s    r7   )r#   r   c             C   s   t | tod| ko| jd S )zR
    Returns true if the given URL looks like
    something fsspec can handle
    z://http://https://)r;   r<   )r   r   
startswith)r#   r$   r$   r%   is_fsspec_url   s    
r>   )r'   encodingcompressionmodestorage_optionsc             C   s  t | } t| tr\t| r\t| }|jjdd}|dkr<d}t|j }|j	  |||dfS t
| rt| tstt| jdr| jdd} | jdr| jdd} td}g }	y&td	 d
dlm}
m} |
|tg}	W n tk
r   Y nX y(|j| fd|pdi|pi j }W nd t|	k
rt   |dkr:ddi}nt|}d|d< |j| fd|p\di|pfi j }Y nX |||dfS t| tttjfrt| d|dfS t| sdt|  }t|| d|dfS )a  
    If the filepath_or_buffer is a url, translate and return the buffer.
    Otherwise passthrough.

    Parameters
    ----------
    filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
                         or buffer
    compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional
    encoding : the encoding to use to decode bytes, default is 'utf-8'
    mode : str, optional
    storage_options: dict, optional
        passed on to fsspec, if using it; this is not yet accessed by the public API

    Returns
    -------
    Tuple[FilePathOrBuffer, str, str, bool]
        Tuple containing the filepath or buffer, the encoding, the compression
        and should_close.
    zContent-EncodingNgzipTzs3a://zs3://zs3n://fsspecZbotocorer   )ClientErrorNoCredentialsErrorrA   rbZanonFz)Invalid file path or buffer object type: )r4   r   r   r&   r7   headersgetr   readcloser>   AssertionErrorr=   replacer   Zbotocore.exceptionsrE   rF   PermissionErrorImportErroropentupledictbytesmmapr+   r   type
ValueError)r'   r?   r@   rA   rB   reqcontent_encodingreaderrD   Zerr_types_to_retry_with_anonrE   rF   Zfile_objmsgr$   r$   r%   get_filepath_or_buffer   sV    



$

(
r[   )r)   r   c             C   s   ddl m} td|| S )z
    converts an absolute native path to a FILE URL.

    Parameters
    ----------
    path : a path in native format

    Returns
    -------
    a valid FILE URL
    r   )pathname2urlzfile:)r5   r\   r   )r)   r\   r$   r$   r%   file_path_to_url   s    r]   z.gzz.bz2z.zipz.xz)rC   bz2zipxz)r@   r   c             C   s\   t | trPt| }y|jd} W qT tk
rL } ztd|W Y dd}~X qTX ni }| |fS )a  
    Simplifies a compression argument to a compression method string and
    a mapping containing additional arguments.

    Parameters
    ----------
    compression : str or mapping
        If string, specifies the compression method. If mapping, value at key
        'method' specifies compression method.

    Returns
    -------
    tuple of ({compression method}, Optional[str]
              {compression arguments}, Dict[str, str])

    Raises
    ------
    ValueError on mapping missing 'method' key
    methodz.If mapping, compression must have key 'method'N)r   r   rR   popKeyErrorrV   )r@   compression_argserrr$   r$   r%   get_compression_method  s    
rf   )r'   r@   r   c             C   s   |dkrdS |dkrVt | } t| ts*dS x&tj D ]\}}| j j|r4|S q4W dS |tkrb|S d| }ddgtt }|d| 7 }t|dS )a  
    Get the compression method for filepath_or_buffer. If compression='infer',
    the inferred compression method is returned. Otherwise, the input
    compression method is returned unchanged, unless it's invalid, in which
    case an error is raised.

    Parameters
    ----------
    filepath_or_buffer : str or file handle
        File path or object.
    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
        If 'infer' and `filepath_or_buffer` is path-like, then detect
        compression from the following extensions: '.gz', '.bz2', '.zip',
        or '.xz' (otherwise no compression).

    Returns
    -------
    string or None

    Raises
    ------
    ValueError on invalid compression specified.
    NZinferzUnrecognized compression type: z
Valid compression types are )	r4   r   r   _compression_to_extensionitemslowerendswithsortedrV   )r'   r@   	extensionrZ   Zvalidr$   r$   r%   infer_compression.  s     

rm   FT)rA   r@   
memory_mapis_textc             C   s  yddl m} tt|f}W n tk
r6   ttf}Y nX t }	| }
t| } t| t}t	|\}}|rnt
| |}|r|dkr|rtj| |f|}
ntjf d| i|}
n|dkr|rtj| |f|}
ntj| f|}
n|dkrjt| |f|}|	j| |jdkr
|}
n^|jdkr|j }t|d	kr<|j|j }
n,t|dkrZtd
|  ntd| n,|dkrtt| |}
nd| }t||	j|
 nL|r|rt| |||dd}
n"|rt| |ddd}
n
t| |}
|	j|
 |r@|st|
|r@ddlm} ||
||dd}t|
ttfs<|	j| |}
|rt|
dryt|
}|
j  |}
W n tk
r   Y nX |
|	fS )af  
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf : str or file handle
        File path or object.
    mode : str
        Mode to open path_or_buf with.
    encoding : str or None
        Encoding to use.
    compression : str or dict, default None
        If string, specifies compression mode. If dict, value at key 'method'
        specifies compression mode. Compression mode must be one of {'infer',
        'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
        and `filepath_or_buffer` is path-like, then detect compression from
        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
        no compression). If dict and compression mode is one of
        {'zip', 'gzip', 'bz2'}, or inferred as one of the above,
        other entries passed as additional compression options.

        .. versionchanged:: 1.0.0

           May now be a dict with key 'method' as compression mode
           and other keys as compression options if compression
           mode is 'zip'.

        .. versionchanged:: 1.1.0

           Passing compression options as keys in dict is now
           supported for compression modes 'gzip' and 'bz2' as well as 'zip'.

    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.).
    errors : str, default 'strict'
        Specifies how encoding and decoding errors are to be handled.
        See the errors argument for :func:`open` for a full list
        of options.

        .. versionadded:: 1.1.0

    Returns
    -------
    f : file-like
        A file-like object.
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    r   )S3FilerC   fileobjr^   r_   wr   zZero files found in ZIP file z9Multiple files found in ZIP file. Only one file per ZIP: r`   zUnrecognized compression type: r   )r?   errorsnewlinerM   )ru   rv   )TextIOWrapperfileno)Zs3fsrp   r   r   rO   listr4   r   r   rf   rm   rC   rP   GzipFiler^   BZ2File_BytesZipFileappendrA   namelistlenrb   rV   r   lzmaiorw   r1   _MMapWrapperrK   	Exception)Zpath_or_bufrA   r?   r@   rn   ro   ru   rp   Zneed_text_wrappingZhandlesfZis_pathrd   zfZ	zip_namesrZ   rw   gwrappedr$   r$   r%   
get_handled  s~    =








r   c                   sH   e Zd ZdZd
eeee d fddZ fddZe	dd	 Z
  ZS )r|   a  
    Wrapper for standard library class ZipFile and allow the returned file-like
    handle to accept byte strings via `write` method.

    BytesIO provides attributes of file-like object and ZipFile.writestr writes
    bytes strings into a member of the archive.
    N)filerA   archive_namec                s4   |dkr|j dd}|| _t j||tjf| d S )NwbrG   br   )r   rG   )rM   r   super__init__zipfileZIP_DEFLATED)selfr   rA   r   r9   )	__class__r$   r%   r     s    z_BytesZipFile.__init__c                s(   | j }| jd k	r| j}t j|| d S )N)filenamer   r   writestr)r   datar   )r   r$   r%   write$  s    
z_BytesZipFile.writec             C   s
   | j d kS )N)fp)r   r$   r$   r%   closed*  s    z_BytesZipFile.closed)N)__name__
__module____qualname____doc__r   r   r   r   r   propertyr   __classcell__r$   r$   )r   r%   r|     s   r|   c               @   sH   e Zd ZdZedddZedddZd dd	d
ZedddZ	dS )r   a  
    Wrapper for the Python's mmap class so that it can be properly read in
    by Python's csv.reader class.

    Parameters
    ----------
    f : file object
        File object to be mapped onto memory. Must support the 'fileno'
        method or have an equivalent attribute

    )r   c             C   s   t j |j dt jd| _ d S )Nr   )access)rT   rx   ZACCESS_READ)r   r   r$   r$   r%   r   <  s    z_MMapWrapper.__init__)namec             C   s   t | j|S )N)getattrrT   )r   r   r$   r$   r%   __getattr__?  s    z_MMapWrapper.__getattr__)r   c             C   s   | S )Nr$   )r   r$   r$   r%   __iter__B  s    z_MMapWrapper.__iter__c             C   s$   | j j }|jd}|dkr t|S )Nzutf-8r   )rT   readlinedecodeStopIteration)r   Znewbytesrv   r$   r$   r%   __next__E  s
    

z_MMapWrapper.__next__N)
r   r   r   r   r   r   r   r   r   r   r$   r$   r$   r%   r   /  s
   r   )NNNN)NNFTN)@r   r^   collectionsr   rC   r   r   r   r   rT   r(   r2   typingr   r   r   r	   r
   r   r   r   r   r   r   urllib.parser   r   r    r   r   r   r   Zpandas._typingr   Zpandas.compatr   r   Zpandas.compat._optionalr   Zpandas.core.dtypes.commonr   r   setr"   discardr   r,   r&   r+   r/   r4   r7   r>   r   r[   r]   rg   rf   rm   r   ZipFiler|   Iteratorr   r$   r$   r$   r%   <module>   sZ   4


   0[" 7    * $!