3
­dz  ã               @   s   d dl Z G dd„ dƒZdS )é    Nc               @   s(   e Zd ZdZdd„ Zdd„ Zdd„ ZdS )	ÚDependencyEvaluatora;  
    Class for measuring labelled and unlabelled attachment score for
    dependency parsing. Note that the evaluation ignores punctuation.

    >>> from nltk.parse import DependencyGraph, DependencyEvaluator

    >>> gold_sent = DependencyGraph("""
    ... Pierre  NNP     2       NMOD
    ... Vinken  NNP     8       SUB
    ... ,       ,       2       P
    ... 61      CD      5       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       2       P
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      NMOD
    ... board   NN      9       OBJ
    ... as      IN      9       VMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """)

    >>> parsed_sent = DependencyGraph("""
    ... Pierre  NNP     8       NMOD
    ... Vinken  NNP     1       SUB
    ... ,       ,       3       P
    ... 61      CD      6       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       3       AMOD
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      AMOD
    ... board   NN      9       OBJECT
    ... as      IN      9       NMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """)

    >>> de = DependencyEvaluator([parsed_sent],[gold_sent])
    >>> las, uas = de.eval()
    >>> las
    0.6...
    >>> uas
    0.8...
    >>> abs(uas - 0.8) < 0.00001
    True
    c             C   s   || _ || _dS )z‰
        :param parsed_sents: the list of parsed_sents as the output of parser
        :type parsed_sents: list(DependencyGraph)
        N)Ú_parsed_sentsÚ_gold_sents)ÚselfZparsed_sentsZ
gold_sents© r   ú3/tmp/pip-build-v9q4h5k9/nltk/nltk/parse/evaluate.pyÚ__init__G   s    zDependencyEvaluator.__init__c                s*   dddddddh‰ dj ‡ fd	d
„|D ƒƒS )z©
        Function to remove punctuation from Unicode string.
        :param input: the input string
        :return: Unicode string after remove all punctuation
        ZPcZPdZPsZPeÚPiZPfZPoÚ c             3   s    | ]}t j|ƒˆ kr|V  qd S )N)ÚunicodedataÚcategory)Ú.0Úx)Úpunc_catr   r   ú	<genexpr>V   s    z4DependencyEvaluator._remove_punct.<locals>.<genexpr>)Újoin)r   ZinStrr   )r   r   Ú_remove_punctO   s    z!DependencyEvaluator._remove_punctc       
      C   s  t | jƒt | jƒkrtdƒ‚d}d}d}xÖtt | jƒƒD ]Ä}| j| j}| j| j}t |ƒt |ƒkrltdƒ‚xŽ|jƒ D ]‚\}}|| }	|d dkr”qv|d |	d kr¬tdƒ‚| j|d ƒdkrÀqv|d7 }|d	 |	d	 krv|d7 }|d
 |	d
 krv|d7 }qvW q8W || || fS )z†
        Return the Labeled Attachment Score (LAS) and Unlabeled Attachment Score (UAS)

        :return : tuple(float,float)
        zE Number of parsed sentence is different with number of gold sentence.r   z!Sentences must have equal length.ÚwordNz!Sentence sequence is not matched.r
   é   ÚheadÚrel)Úlenr   r   Ú
ValueErrorÚrangeZnodesÚitemsr   )
r   ZcorrZcorrLÚtotalÚiZparsed_sent_nodesZgold_sent_nodesZparsed_node_addressZparsed_nodeZ	gold_noder   r   r   ÚevalX   s2    zDependencyEvaluator.evalN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r   r   r      s   9	r   )r   r   r   r   r   r   Ú<module>	   s   