3
d.v                 @   s  d dl Z d dlmZ d dlZd dlmZ d dlmZmZ d dl	m
Z
mZmZmZmZ d dlmZmZmZmZ dddd	d
ddZdddgZG dd dZG dd deZG dd dZG dd deZG dd deZG dd deZd ddZedk re  dS )!    N)chain)Counter)drtlinearlogic)AbstractVariableExpression
ExpressionLambdaExpressionVariableVariableExpression)BigramTaggerRegexpTaggerTrigramTaggerUnigramTaggerex_quant
univ_quantZdef_artZno_quant)aZanZeveryZthenodefaultnmodvmodpunctc               @   sp   e Zd ZdddZdd Zdd Zdd	 Zd
d ZdddZdd Z	dd Z
dd Zdd Zdd Zdd ZdS )GlueFormulaNc             C   s   |s
t  }t|tr"tj|| _n$t|tr4|| _ntd||jf t|trbtj	 j
|| _n&t|tjrv|| _ntd||jf || _d S )Nz1Meaning term neither string or expression: %s, %sz.Glue term neither string or expression: %s, %s)set
isinstancestrr   
fromstringmeaningRuntimeError	__class__r   LinearLogicParserparseglueindices)selfr   r!   r"    r$   -/tmp/pip-build-v9q4h5k9/nltk/nltk/sem/glue.py__init__%   s"    


zGlueFormula.__init__c             C   s   | j |j @ r&tjd|  d| dn| j |j B }ytj| j|j|j }W nF tjk
r } z(tjd| j  d|j  d|W Y dd}~X nX |j}|rx4| jj jjddd D ]}| j	t
d| |}qW | jj|}| j|||S )zyself = (\x.(walk x), (subj -o f))
        arg  = (john        ,  subj)
        returns ((walk john),          f)
        'z' applied to 'z'.  Indices are not disjoint.N   zv%s)r"   r   LinearLogicApplicationExceptionApplicationExpressionr!   simplifyr   
antecedentZdependenciesmake_LambdaExpressionr	   applytor   )r#   argZreturn_indicesZreturn_glueeZarg_meaning_abstracteddepZreturn_meaningr$   r$   r%   r/   ?   s&    zGlueFormula.applytoc             C   s   t |S )N)r
   )r#   namer$   r$   r%   make_VariableExpression`   s    z#GlueFormula.make_VariableExpressionc             C   s
   t ||S )N)r   )r#   variabletermr$   r$   r%   r.   c   s    z!GlueFormula.make_LambdaExpressionc             C   sD   t |tstt |jtst| j| j|jj| jtj	|j
| j
S )N)r   r   AssertionErrorr   r   r   r.   r5   r   ImpExpressionr!   )r#   otherr$   r$   r%   lambda_abstractf   s
    zGlueFormula.lambda_abstractc             C   s>   |s
t  }| jj j|| j\}}|| j| j||j hg S )z)From Iddo Lev's PhD Dissertation p108-109)r   r!   r,   Zcompile_posr   r   get)r#   counterZcompiled_glueZ	new_formsr$   r$   r%   compilen   s    
zGlueFormula.compilec             C   s   | j | jj | jj | jS )N)r   r   r,   r!   r"   )r#   r$   r$   r%   r,   y   s    zGlueFormula.simplifyc             C   s$   | j |j ko"| j|jko"| j|jkS )N)r   r   r!   )r#   r9   r$   r$   r%   __eq__~   s    zGlueFormula.__eq__c             C   s
   | |k S )Nr$   )r#   r9   r$   r$   r%   __ne__   s    zGlueFormula.__ne__c             C   s   t | t |k S )N)r   )r#   r9   r$   r$   r%   __lt__   s    zGlueFormula.__lt__c             C   sR   t | jtst| j d| j }| jrN|ddjdd t| jD  d 7 }|S )Nz : z : {z, c             s   s   | ]}t |V  qd S )N)r   ).0indexr$   r$   r%   	<genexpr>   s    z&GlueFormula.__str__.<locals>.<genexpr>})r   r"   r   r7   r   r!   joinsorted)r#   accumr$   r$   r%   __str__   s    $zGlueFormula.__str__c             C   s   d|  S )Nz%sr$   )r#   r$   r$   r%   __repr__   s    zGlueFormula.__repr__)N)N)__name__
__module____qualname__r&   r/   r4   r.   r:   r=   r,   r>   r?   r@   rH   rI   r$   r$   r$   r%   r   $   s   
!
	r   c               @   s   e Zd Zd"ddZd#ddZdd Zd$d
dZdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zd d! ZdS )%GlueDictNc             C   s   || _ || _| j  d S )N)filenamefile_encoding	read_file)r#   rN   encodingr$   r$   r%   r&      s    zGlueDict.__init__Tc             C   s~  |r| j   ytjj| jd| jd}W nZ tk
r } z>ytjjd| j d| jd}W n tk
rn   |Y nX W Y d d }~X nX |j }x|D ]}|j }t	|sq|d dkrq|j
dd}g }d}d}	d}
d }t	|dkrxt|d D ]\}}|d	kr"|dkr|d }	|d7 }q|d
krr|d8 }|dkr|d |	|
 }|d |
d | }|j||g q|dkr|dkr|}
q|dkr|dkrtd| P qW t	|dkr|d jdd }|d jd}||krt }n$tdd |d || j
dD }yH|d jd	}|d jd
}|d d | j }|d |d | }W n   |d j }d }Y nX || kri | |< |d kr*|rx| | D ]P}|| | krg | | |< | | | }| | | j| | | | j| qW n,d | | krg | | d < | | d  j| q|| | krDg | | |< |rd| | | j| | |  | | | j| qW d S )Ntext)formatrQ   zfile:r   #z :    r(   (),z&Formula syntax is incorrect for entry []c             s   s   | ]}|j  V  qd S )N)strip)rA   rr$   r$   r%   rC      s    z%GlueDict.read_file.<locals>.<genexpr>)clearnltkdataloadrN   rO   LookupError
splitlinesr[   lensplit	enumerateappendr   rB   	frozensetextend)r#   Zempty_firstcontentsr1   lineslinepartsZglue_formulasZparen_countZtuple_startZtuple_commarelationshipsicZmeaning_termZ	glue_termZ	rel_startZrel_endZstart_inheritanceZend_inheritanceZsemZ	supertypeZrelsr!   r$   r$   r%   rP      s    










"


zGlueDict.read_filec             C   s   d}x| D ]}d| }x| | D ]}d}x~| | | D ]n}|dkrP||d 7 }n|dt |d  7 }|d| 7 }|r|t | | | kr|d| 7 }|d7 }|d7 }q6W q W q
W |S )	N z%sr(   z:  rU   z : %s
)rc   )r#   rG   posZstr_posZrelsetrn   gfr$   r$   r%   rH     s    
zGlueDict.__str__Fc             C   s   |d krH|j d }ttj|d j }|j |d  }| j||t |S | j|||}x:tj|d j D ]$}	|j |	 }
|j| j||
|| qjW |S )Nr   deps)	nodeslistr   from_iterablevaluesto_glueformula_listr   lookuprh   )r#   depgraphnoder<   verbosetopZdepListrootglueformulasZdep_idxr2   r$   r$   r%   rz   &  s    

zGlueDict.to_glueformula_listc             C   s   | j |}d }x|D ]}|| kr| | }P qW |d kr:g S | j|| | j|||}t|sztd|d |d |d f | j||d |||S )NzKThere is no GlueDict entry for sem type of '%s' with tag '%s', and rel '%s'wordtagrel)get_semtypesadd_missing_dependencies_lookup_semtype_optionrc   KeyError#get_glueformulas_from_semtype_entry)r#   r}   r|   r<   Zsemtype_namessemtyper3   r{   r$   r$   r%   r{   7  s     

zGlueDict.lookupc             C   sb   |d j  }|dkr^|j|d  }| jd||}|d }|d j|g  |d | j|d  d S )Nr   mainheadsubjru   address)lowerrv   lookup_unique
setdefaultrf   )r#   r}   r|   r   Zheadnoder   Zrelationr$   r$   r%   r   Q  s    z!GlueDict.add_missing_dependenciesc                s   t  fddtj|d j D }y|| }W nl tk
r   t  }x2t|d h D ] }t|t|krV||k rV|}qVW |sd |krd }nd S || }Y nX |S )Nc             3   s6   | ].} j | d  j tkr j | d  j V  qdS )r   N)rv   r   OPTIONAL_RELATIONSHIPS)rA   r2   )r|   r$   r%   rC   ^  s   z2GlueDict._lookup_semtype_option.<locals>.<genexpr>ru   )rg   r   rx   ry   r   r   rc   )r#   r   r}   r|   rm   r{   
best_matchZrelset_optionr$   )r|   r%   r   \  s"    
zGlueDict._lookup_semtype_optionc             C   s`   |d j  }|d j  }|dkr>|tkr2t| gS td gS n|d	krR|d |gS |d gS dS )
zj
        Based on the node, return a list of plausible semtypes in order of
        plausibility.
        r   r   specr   r   r   r   N)r   r   )r   SPEC_SEMTYPES)r#   r}   r   r   r$   r$   r%   r   z  s    
zGlueDict.get_semtypesc             C   s|   g }| j  }xj|D ]b\}}	|| j|||	}
t|s<||
_n| t|d  |
_| j|
j|||j |
_|j|
 qW |S )Nr(   )get_GlueFormula_factoryget_meaning_formularc   r   initialize_labelsr!   r;   rf   )r#   r{   r   r}   r|   r<   r   ZglueFormulaFactoryr   r!   rt   r$   r$   r%   r     s    z,GlueDict.get_glueformulas_from_semtype_entryc             C   s   |j dd}|j d|S )z
        :param generic: A meaning formula string containing the
            parameter "<word>"
        :param word: The actual word to be replace "<word>"
        .rp   z<word>)replace)r#   Zgenericr   r$   r$   r%   r     s    zGlueDict.get_meaning_formulac             C   sp   t |tjrD| j|jj |||}|d j r8tj|S tj|S n(tj	| j
|j|||| j
|j|||S d S )Nr   )r   r   ZAtomicExpressionfind_label_namer3   r   isupperr
   ZConstantExpressionr8   r   r-   Z
consequent)r#   exprr}   r|   unique_indexr3   r$   r$   r%   r     s    
zGlueDict.initialize_labelsc       	      C   s<  yf|j d}|d | }||d d  }|dkrJ| j||j|d  ||S | j|| j|||||S W n tk
r6   | j|}|dkr|S |dkrd| S |dkrd	| S |dkr| j|j|d  S |d
kr|j  | S |dkr | j| jd||S |dkr| j| jd||S | j| j|||S Y nX d S )Nr   r(   superr   fvz%svr\   z%srvarr   ZconjabZconjb)rB   r   rv   r   
ValueError	get_labelupper)	r#   r3   r}   r|   r   dotZ
before_dotZ	after_dotZlblr$   r$   r%   r     s:    




zGlueDict.find_label_namec             C   sp   |d }dddddddd	d
dddddddddddddddddg|d  }t |d }|dkrh|t| S |S dS ) z
        Pick an alphabetic character as identifier for an entity in the model.

        :param value: where to index into the list of characters
        :type value: int
        r   r   ghrn   jklmnopqr\   stur   wxyzr   r   ro   dr1   r(      r   N)intr   )r#   r}   valueletternumr$   r$   r%   r     s@    
zGlueDict.get_labelc                sr    fddt j|d j D }t|dkrDtdj|d n*t|dkrftdj|d n|d S d	S )
z\
        Lookup 'key'. There should be exactly one item in the associated relation.
        c                s0   g | ](} j | d  j j kr j | qS )r   )rv   r   )rA   r2   )r|   r   r$   r%   
<listcomp>  s   z*GlueDict.lookup_unique.<locals>.<listcomp>ru   r   z#'{}' doesn't contain a feature '{}'r   r(   z&'{}' should only have one feature '{}'N)r   rx   ry   rc   r   rS   )r#   r   r}   r|   ru   r$   )r|   r   r%   r     s    zGlueDict.lookup_uniquec             C   s   t S )N)r   )r#   r$   r$   r%   r     s    z GlueDict.get_GlueFormula_factory)N)T)NNF)rJ   rK   rL   r&   rP   rH   rz   r{   r   r   r   r   r   r   r   r   r   r   r$   r$   r$   r%   rM      s   

u
	$+rM   c               @   sh   e Zd ZdddZdddZdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )GlueNFc             C   sH   || _ || _|| _ddlm} | | _|r2|| _ntjj	ddd| _d S )Nr   )Prover9grammarssample_grammarszglue.semtype)
r~   remove_duplicates	depparserr^   r   proversemtype_fileospathrE   )r#   r   r   r   r~   r   r$   r$   r%   r&     s    zGlue.__init__c             C   s6   |r| j j| n | j jtjjtjjddd d S )Nr   r   zglue_train.conll)	r   ZtrainZtrain_from_filer^   r_   findr   r   rE   )r#   Z	depgraphsr$   r$   r%   train_depparser/  s
    zGlue.train_depparserc             C   s,   g }x"| j |D ]}|j| j| qW |S )N)parse_to_compiledrh   get_readings)r#   sentencereadingsagendar$   r$   r%   parse_to_meaning9  s    zGlue.parse_to_meaningc             C   s  g }t |}t }t }x|r |j }|jj }t|tjrx|D ]}y|t|jtjrf|jj	}	ntj
 }	|jj||	 xH|| D ]<}
|j|
j@ sy|j|j|
 W q tjk
r   Y qX qW W qH tjk
r   Y qHX qHW y||j j| W n" tk
r   |g||j< Y nX qx|D ]}x|| D ]}ynt|jtjrR|jj	}	ntj
 }	|j||	 |j|j@ sy|j|j| W n tjk
r   Y nX W n tjk
r   Y nX q2W q$W y|| j| W q tk
r   |g||< Y qX qW x>|D ]6}x.|| D ]"}t |j|kr| j|| qW qW x>|D ]6}x.|| D ]"}t |j|krV| j|| qVW qHW |S )N)rc   dictpopr!   r,   r   r   r8   r+   bindingsZBindingDictr-   Zunifyr"   rf   r/   r*   ZUnificationExceptionr   _add_to_reading_list)r#   r   r   Zagenda_lengthZatomicsZ
nonatomicscurZ	glue_simpkeyr   ZatomicZ	nonatomicentryrt   r$   r$   r%   r   ?  sn    









zGlue.get_readingsc             C   st   d}| j r`xT|D ]L}y|j|j| jr,d}P W q tk
rZ } ztd| W Y d d }~X qX qW |rp|j|j d S )NTFz2Error when checking logical equality of statements)r   equivr   r   	Exceptionprintrf   )r#   ZglueformulaZreading_listZadd_readingreadingr1   r$   r$   r%   r     s    
 zGlue._add_to_reading_listc                s*    fdd j |D } fdd|D S )Nc                s   g | ]} j |qS r$   )depgraph_to_glue)rA   Zdg)r#   r$   r%   r     s    z*Glue.parse_to_compiled.<locals>.<listcomp>c                s   g | ]} j |qS r$   )gfl_to_compiled)rA   gfl)r#   r$   r%   r     s    )	dep_parse)r#   r   Zgflsr$   )r#   r%   r     s    zGlue.parse_to_compiledc             C   sH   | j dkr&ddlm} || j d| _ | j js6| j  | j j|| jdS )z
        Return a dependency graph for the sentence.

        :param sentence: the sentence to be parsed
        :type sentence: list(str)
        :rtype: DependencyGraph
        Nr   )
MaltParser)tagger)r~   )r   
nltk.parser   get_pos_taggerZ_trainedr   r    r~   )r#   r   r   r$   r$   r%   r     s    

zGlue.dep_parsec             C   s   | j  j|S )N)get_glue_dictrz   )r#   r|   r$   r$   r%   r     s    zGlue.depgraph_to_gluec             C   s
   t | jS )N)rM   r   )r#   r$   r$   r%   r     s    zGlue.get_glue_dictc             C   sP   t  }g }x|D ]}|j|j| qW | jrLtd x|D ]}t| q<W |S )NzCompiled Glue Premises:)r   rh   r=   r~   r   )r#   r   index_counterZreturn_listrt   Zcgfr$   r$   r%   r     s    

zGlue.gfl_to_compiledc             C   sj   ddl m} tdddddd d!d"d#g	}|jdd}t||d}t||d}t||d}td$d%g|d}|S )&Nr   )brown^-?[0-9]+(\.[0-9]+)?$CD(The|the|A|a|An|an)$AT.*able$JJ.*ness$NN.*ly$RB.*s$NNS.*ing$VBG.*ed$VBD.*news)
categories)backoff(A|a|An|an)$r   (Every|every|All|all)$r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )Znltk.corpusr   r   Ztagged_sentsr   r   r   )r#   r   Zregexp_taggerZbrown_trainZunigram_taggerZbigram_taggerZtrigram_taggerZmain_taggerr$   r$   r%   r     s&    zGlue.get_pos_tagger)NFNF)N)rJ   rK   rL   r&   r   r   r   r   r   r   r   r   r   r   r$   r$   r$   r%   r     s   


Dr   c               @   s&   e Zd ZdddZdd Zdd ZdS )	DrtGlueFormulaNc             C   s   |s
t  }t|tr$tjj|| _n&t|tjr8|| _ntd||jf t|trft	j
 j|| _n&t|t	jrz|| _ntd||jf || _d S )Nz1Meaning term neither string or expression: %s, %sz.Glue term neither string or expression: %s, %s)r   r   r   r   ZDrtExpressionr   r   r   r   r   r   r    r!   r   r"   )r#   r   r!   r"   r$   r$   r%   r&     s"    

zDrtGlueFormula.__init__c             C   s
   t j|S )N)r   ZDrtVariableExpression)r#   r3   r$   r$   r%   r4     s    z&DrtGlueFormula.make_VariableExpressionc             C   s   t j||S )N)r   ZDrtLambdaExpression)r#   r5   r6   r$   r$   r%   r.     s    z$DrtGlueFormula.make_LambdaExpression)N)rJ   rK   rL   r&   r4   r.   r$   r$   r$   r%   r     s   
r   c               @   s   e Zd Zdd ZdS )DrtGlueDictc             C   s   t S )N)r   )r#   r$   r$   r%   r     s    z#DrtGlueDict.get_GlueFormula_factoryN)rJ   rK   rL   r   r$   r$   r$   r%   r     s   r   c               @   s   e Zd ZdddZdd ZdS )DrtGlueNFc             C   s*   |st jjddd}tj| |||| d S )Nr   r   zdrt_glue.semtype)r   r   rE   r   r&   )r#   r   r   r   r~   r$   r$   r%   r&     s    
zDrtGlue.__init__c             C   s
   t | jS )N)r   r   )r#   r$   r$   r%   r     s    zDrtGlue.get_glue_dict)NFNF)rJ   rK   rL   r&   r   r$   r$   r$   r%   r     s   
r   r(   c       	   
   C   s   ddl m} ddddddg}td	 td d!d"d#d$d%d&d'g}||d}t|dd}xdt|D ]X\}}|| kst| d(kr\td| d|  x"|j|j D ]}t|j  qW td q\W d S ))Nr   )r   zDavid sees MaryzDavid eats a sandwichzevery man chases a dogzevery man believes a dog sleepszJohn gives David a sandwichzJohn chases himselfz"============== DEMO ==============^(David|Mary|John)$NNPN^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$VB!^(go|order|vanish|find|approach)$^(a)$r   	^(every)$r   .^(sandwich|man|dog|pizza|unicorn|cat|senator)$r   ^(big|gray|former)$r   ^(him|himself)$PRP)r   F)r   r~   r(   z[[[Example z]]]  rp   )r  r  )r  r  )r  r  )r  r   )r  r   )r  r   )r	  r   )r
  r  r)   )	r   r   r   r   r   re   r   rd   r,   )	Zshow_exampler   Zexamplesr   r   r!   rn   r   r   r$   r$   r%   demo  s4     
r  __main__r)   )r)   )r   	itertoolsr   r^   Znltk.internalsr   Znltk.semr   r   Znltk.sem.logicr   r   r   r	   r
   Znltk.tagr   r   r   r   r   r   r   r   rM   r   r   r   r   r  rJ   r$   r$   r$   r%   <module>	   s4   
u    @"
2
