3
d7'                 @   s   d dl Z d dlmZ d dlmZ d dlmZ yd dlZW n ek
rL   Y nX d dl	m
Z
 G dd de
Zdd	 Zd
d ZG dd dZG dd dZdS )    N)abstractmethod)sqrt)stdout)ClusterIc               @   sd   e Zd ZdZdddZdddZedd	 Zd
d Zedd Z	dd Z
dd Zdd Zdd ZdS )VectorSpaceClustererz
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    FNc             C   s   d| _ || _|| _dS )a)  
        :param normalise:       should vectors be normalised to length 1
        :type normalise:        boolean
        :param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        :type svd_dimensions:   int
        N)_Tt_should_normalise_svd_dimensions)selfZ	normaliseZsvd_dimensions r   1/tmp/pip-build-v9q4h5k9/nltk/nltk/cluster/util.py__init__   s    zVectorSpaceClusterer.__init__c       
         s   t |dkst jr&tt j|} jrĈ jt |d k rtjj	tj
tj|\}}}|d  j tj jtj }|d d d  jf }|d  jd d f }	tj
tj||	}tj
| _ j|| |r fdd|D S d S )Nr   c                s   g | ]} j |qS r   )classify).0vector)r
   r   r   
<listcomp>?   s    z0VectorSpaceClusterer.cluster.<locals>.<listcomp>)lenAssertionErrorr   listmap
_normaliser	   numpyZlinalgZsvdZ	transposearrayidentityZfloat64dotr   cluster_vectorspace)
r
   vectorsZassign_clusterstraceudZvtSTZDtr   )r
   r   cluster(   s    zVectorSpaceClusterer.clusterc             C   s   dS )zD
        Finds the clusters using the given set of vectors.
        Nr   )r
   r   r   r   r   r   r   A   s    z(VectorSpaceClusterer.cluster_vectorspacec             C   s<   | j r| j|}| jd k	r(tj| j|}| j|}| j|S )N)r   r   r   r   r   classify_vectorspaceZcluster_name)r
   r   r"   r   r   r   r   G   s    


zVectorSpaceClusterer.classifyc             C   s   dS )zN
        Returns the index of the appropriate cluster for the vector.
        Nr   )r
   r   r   r   r   r#   O   s    z)VectorSpaceClusterer.classify_vectorspacec             C   s4   | j r| j|}| jd k	r(tj| j|}| j||S )N)r   r   r   r   r   likelihood_vectorspace)r
   r   labelr   r   r   
likelihoodU   s
    

zVectorSpaceClusterer.likelihoodc             C   s   | j |}||krdS dS )zP
        Returns the likelihood of the vector belonging to the cluster.
        g      ?g        )r#   )r
   r   r"   Z	predictedr   r   r   r$   \   s    
z+VectorSpaceClusterer.likelihood_vectorspacec             C   s,   | j r| j|}| jdk	r(tj| j|}|S )zU
        Returns the vector after normalisation and dimensionality reduction
        N)r   r   r   r   r   )r
   r   r   r   r   r   c   s
    

zVectorSpaceClusterer.vectorc             C   s   |t tj|| S )z7
        Normalises the vector to unit length.
        )r   r   r   )r
   r   r   r   r   r   m   s    zVectorSpaceClusterer._normalise)FN)FF)__name__
__module____qualname____doc__r   r"   r   r   r   r#   r&   r$   r   r   r   r   r   r   r      s   


r   c             C   s   | | }t tj||S )z}
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    )r   r   r   )r   vZdiffr   r   r   euclidean_distancet   s    r,   c             C   s0   dt j| |tt j| | tt j||   S )zw
    Returns 1 minus the cosine of the angle between vectors v and u. This is
    equal to ``1 - (u.v / |u||v|)``.
       )r   r   r   )r   r+   r   r   r   cosine_distance}   s    r.   c               @   s2   e Zd ZdZdd ZdddZdd Zd	d
 ZdS )_DendrogramNodezTree node of a dendrogram.c             G   s   || _ || _d S )N)_value	_children)r
   valuechildrenr   r   r   r      s    z_DendrogramNode.__init__Tc             C   sD   | j r.g }x| j D ]}|j|j| qW |S |r:| jgS | gS d S )N)r1   extendleavesr0   )r
   valuesr5   childr   r   r   r5      s    z_DendrogramNode.leavesc             C   s   | j | fg}xpt||k r||j \}}|js<|j||f P x4|jD ]*}|jr`|j|j |f qD|jd|f qDW |j  qW g }x|D ]\}}|j|j  qW |S )Nr   )r0   r   popr1   pushappendsortr5   )r
   nqueueprioritynoder7   groupsr   r   r   r@      s    z_DendrogramNode.groupsc             C   s   t | j|jdk S )Nr   )r.   r0   )r
   Z
comparatorr   r   r   __lt__   s    z_DendrogramNode.__lt__N)T)r'   r(   r)   r*   r   r5   r@   rA   r   r   r   r   r/      s
   
r/   c               @   s@   e Zd ZdZg fddZdd Zdd Zg fdd	Zd
d ZdS )
Dendrograma  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c             C   s(   dd |D | _ tj| j | _d| _dS )zs
        :param  items: the items at the leaves of the dendrogram
        :type   items: sequence of (any)
        c             S   s   g | ]}t |qS r   )r/   )r   itemr   r   r   r      s    z'Dendrogram.__init__.<locals>.<listcomp>r-   N)_itemscopy_original_items_merge)r
   itemsr   r   r   r      s    zDendrogram.__init__c                sl   t |dkstt jf fdd|D  }  jd7  _| j|d < x|dd D ]} j|= qXW dS )a=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        :param  indices: indices of the items to merge (at least two)
        :type   indices: seq of int
           c             3   s   | ]} j | V  qd S )N)rD   )r   i)r
   r   r   	<genexpr>   s    z#Dendrogram.merge.<locals>.<genexpr>r-   r   N)r   r   r/   rG   rD   )r
   indicesr?   rJ   r   )r
   r   merge   s    	zDendrogram.mergec             C   s6   t | jdkr"t| jf| j }n
| jd }|j|S )z
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        :param  n: number of groups
        :type   n: int
        r-   r   )r   rD   r/   rG   r@   )r
   r<   rootr   r   r   r@      s    
zDendrogram.groupsc                s@  d\}}}t | jdkr,t| jf| j }n
| jd }| j}|rF|}ndd |D }ttt |d d t d dfd
d	 dd }|j|fg}	 fdd|D }
x`|	r|	j	 \}}t
tdd |j}t
t|j|}|rt|}t|}xtt |D ]}|| |krz||kr@| |d	| n,||kr\| ||d	 n| |||  ||
|< n:||  ko|kn  r| ||| n||
|  qW |d x(|jD ]}|jr|	j|j|f qW |	j  x|
D ]}|| qW |d qW |djfdd|D  |d dS )z
        Print the dendrogram in ASCII art to standard out.

        :param leaf_labels: an optional list of strings to use for labeling the
                            leaves
        :type leaf_labels: list
        +-|r-   r   c             S   s   g | ]}d |j  qS )z%s)r0   )r   leafr   r   r   r      s    z#Dendrogram.show.<locals>.<listcomp>rI    c                s    |  |  |  S )Nr   )Zcentreleftright)lhalfrhalfr   r   format   s    zDendrogram.show.<locals>.formatc             S   s   t j|  d S )N)r   write)strr   r   r   display   s    z Dendrogram.show.<locals>.displayc                s   g | ]} d qS )rS   r   )r   rR   )rX   r   r   r     s    c             S   s   | j dd S )NFr   )r5   )cr   r   r   <lambda>  s    z!Dendrogram.show.<locals>.<lambda>
 c             3   s   | ]}|j  V  qd S )N)center)r   rC   )widthr   r   rK   #  s    z"Dendrogram.show.<locals>.<genexpr>N)rO   rP   rQ   )rS   rS   )r   rD   r/   rG   rF   maxr   intr0   r8   r   r1   indexminranger:   r;   join)r
   Zleaf_labelsZJOINZHLINKZVLINKrN   r5   Zlast_rowr[   r=   Z	verticalsr>   r?   Zchild_left_leafrL   min_idxZmax_idxrJ   r7   Zverticalr   )rX   rV   rW   ra   r   show   sV    






zDendrogram.showc             C   sB   t | jdkr"t| jf| j }n
| jd }|jd}dt | S )Nr-   r   Fz<Dendrogram with %d leaves>)r   rD   r/   rG   r5   )r
   rN   r5   r   r   r   __repr__&  s
    

zDendrogram.__repr__N)	r'   r(   r)   r*   r   rM   r@   ri   rj   r   r   r   r   rB      s   	IrB   )rE   abcr   mathr   sysr   r   ImportErrorZnltk.cluster.apir   r   r,   r.   r/   rB   r   r   r   r   <module>   s   _	+