3
Ud#                 @   s   d Z ddlZddlmZ ddlZddljjZddl	m
Z
mZmZ ddlmZmZmZmZ dZeddd	Zd
ded
feeee edddZdefedddZeedddZded
feeedddZdS )z"
data hash pandas / numpy objects
    N)Optional)is_categorical_dtypeis_extension_array_dtypeis_list_like)ABCDataFrameABCIndexClassABCMultiIndex	ABCSeriesZ0123456789123456)	num_itemsc             C   s   yt | }W n  tk
r,   tjg tjdS X tj|g| } tjd}tj|tjd }xBt| D ]6\}}|| }||N }||9 }|tjd| | 7 }qdW |d |kst	d|tjd7 }|S )z
    Parameters
    ----------
    arrays : generator
    num_items : int

    Should be the same as CPython's tupleobject.c
    )dtypeiCB ixV4 iXB    zFed in wrong num_itemsi| )
nextStopIterationnparrayuint64	itertoolschainZ
zeros_like	enumerateAssertionError)arraysr
   firstZmultoutiaZ	inverse_i r   L/var/www/html/virt/lib64/python3.6/site-packages/pandas/core/util/hashing.py_combine_hash_arrays   s    	
r   Tutf8)indexencodinghash_key
categorizec                sx  ddl m} dkrtttr8|tdddS ttrptj j	ddd}||ddd}ntt
rtj j	ddd}|rȇ fd	d
dD }tj|g|}t|d}||jddd}nttrbdd
 j D }	tj}
|rD fdd
dD }|
d7 }
tj|	|}dd
 |D }	t|	|
}||jddd}ntdt |S )aX  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object
    r   )SeriesNr   F)r   copy)r$   )r   r   r$   c             3   s$   | ]}t jd  djV  qdS )F)r   r    r!   r"   N)hash_pandas_objectr   _values).0_)r"   r    r!   objr   r   	<genexpr>e   s   z%hash_pandas_object.<locals>.<genexpr>   c             s   s   | ]\}}t |jV  qd S )N)
hash_arrayr&   )r'   r(   Zseriesr   r   r   r*   t   s    c             3   s$   | ]}t jd  djV  qdS )F)r   r    r!   r"   N)r%   r   r&   )r'   r(   )r"   r    r!   r)   r   r   r*   x   s   r   c             s   s   | ]
}|V  qd S )Nr   )r'   xr   r   r   r*      s    zUnexpected type for hashing )N)N)pandasr#   _default_hash_key
isinstancer   hash_tuplesr   r,   r&   astyper	   r   r   r   r   r   itemslencolumns	TypeErrortype)r)   r   r    r!   r"   r#   hZ
index_iterr   hashesr
   Zindex_hash_generator_hashesr   )r"   r    r!   r)   r   r%   7   s>    







r%   )r!   c                s   d}t trgd}nts*tdddlm m} t tsN|j fddt	j
D fdd	D }t|t}|r|d }|S )
a  
    Hash an MultiIndex / list-of-tuples efficiently

    Parameters
    ----------
    vals : MultiIndex, list-of-tuples, or single tuple
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray of hashed values array
    FTz'must be convertible to a list-of-tuplesr   )Categorical
MultiIndexc                s(   g | ] } j | j| d ddqS )FT)orderedfastpath)codesZlevels)r'   level)r;   valsr   r   
<listcomp>   s   zhash_tuples.<locals>.<listcomp>c             3   s   | ]}t | d V  qdS ))r    r!   N)_hash_categorical)r'   cat)r    r!   r   r   r*      s    zhash_tuples.<locals>.<genexpr>)r0   tupler   r6   r.   r;   r<   r   from_tuplesrangeZnlevelsr   r4   )rA   r    r!   Zis_tupler<   r9   r8   r   )r;   r    r!   rA   r   r1      s     


r1   )r    r!   c             C   sl   t j| jj}t|||dd}| j }t|r<|j| j}nt j	t|dd}|j
 rht jt jj||< |S )a  
    Hash a Categorical by hashing its categories, and then mapping the codes
    to the hashes

    Parameters
    ----------
    c : Categorical
    encoding : str
    hash_key : str

    Returns
    -------
    ndarray of hashed values array, same size as len(c)
    F)r"   r   )r   )r   Zasarray
categoriesr&   r,   Zisnar4   Ztaker?   ZzerosanyZiinfor   max)cr    r!   valueshashedmaskresultr   r   r   rC      s    	rC   )r    r!   r"   c             C   s  t | dstd| j}t|r,t| ||S t|rF| j \} }| j}tj|tj	rtt
tj| dt
tj|   S t|tr| jd} nt|jtjtjfr| jdjddd} nt|jtjr|jdkr| jd	| jj jd} n|r2d
dlm}m}m} || dd\}	}
||	||
ddd}t|||S ytj| ||} W n0 tk
rt   tj| jtjt||} Y nX | | d? N } | tjd9 } | | d? N } | tjd9 } | | d? N } | S )a9  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray, Categorical
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    1d uint64 numpy array of hash values, same length as the vals
    r   zmust pass a ndarray-like   u8i8F)r$      ur   )r;   Index	factorize)sortT)r=   r>      l   e9z    l   b&&&	    ) hasattrr6   r   r   rC   r   Z_values_for_factorizer   Z
issubdtypeZ
complex128r,   realimagr0   boolr2   
issubclassr7   Z
datetime64Ztimedelta64viewnumberitemsizer.   r;   rU   rV   hashingZhash_object_arraystrobjectr   )rA   r    r!   r"   r   r(   r;   rU   rV   r?   rH   rD   r   r   r   r,      s@    
 
r,   )__doc__r   typingr   Znumpyr   Zpandas._libs.hashingZ_libsrc   Zpandas.core.dtypes.commonr   r   r   Zpandas.core.dtypes.genericr   r   r   r	   r/   intr   r^   rd   r%   r1   rC   r,   r   r   r   r   <module>   s"   R+(