3
­d‡&  ã               @   s  d Z ddlZddlmZ ddlmZmZmZmZ ddl	m
Z
 ddlmZ ejdƒZejdƒZejd	ƒZejd
ejƒZejdejƒZejdejƒZejdƒZG dd„ dƒZG dd„ dƒZdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zd%dd„Zd&dd „Ze
d!ƒd"d#„ ƒZed$ƒZ dS )'z
CCG Lexicons
é    N)Údefaultdict)ÚCCGVarÚ	DirectionÚFunctionalCategoryÚPrimitiveCategory)Ú
deprecated)Ú
Expressionz([A-Za-z]+)(\[[A-Za-z,]+\])?z"([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)z([\\/])([.,]?)([.,]?)(.*)z([\S_]+)\s*(::|[-=]+>)\s*(.+)z([^{}]*[^ {}])\s*(\{[^}]+\})?z\{([^}]+)\}z([^#]*)(?:#.*)?c               @   s:   e Zd ZdZddd„Zdd„ Zdd„ Zd	d
„ Zdd„ ZdS )ÚTokenzÄ
    Class representing a token.

    token => category {semantics}
    e.g. eat => S\var[pl]/var {\x y.eat(x,y)}

    * `token` (string)
    * `categ` (string)
    * `semantics` (Expression)
    Nc             C   s   || _ || _|| _d S )N)Z_tokenÚ_categÚ
_semantics)ÚselfÚtokenÚcategÚ	semantics© r   ú0/tmp/pip-build-v9q4h5k9/nltk/nltk/ccg/lexicon.pyÚ__init__9   s    zToken.__init__c             C   s   | j S )N)r
   )r   r   r   r   r   >   s    zToken.categc             C   s   | j S )N)r   )r   r   r   r   r   A   s    zToken.semanticsc             C   s2   d}| j d k	r dt| j ƒ d }dt| jƒ | S )NÚ z {Ú})r   Ústrr
   )r   Úsemantics_strr   r   r   Ú__str__D   s    
zToken.__str__c             C   s*   t |tƒsdS t| j| jf|jƒ |jƒ ƒS )Né   éÿÿÿÿ)Ú
isinstancer	   Úcmpr
   r   r   r   )r   Úotherr   r   r   Ú__cmp__J   s    
zToken.__cmp__)N)	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r   r   r   r   r	   -   s   

r	   c               @   s0   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
S )Ú
CCGLexiconzâ
    Class representing a lexicon for CCG grammars.

    * `primitives`: The list of primitive categories for the lexicon
    * `families`: Families of categories
    * `entries`: A mapping of words to possible categories
    c             C   s    t |ƒ| _|| _|| _|| _d S )N)r   Ú_startZ_primitivesZ	_familiesÚ_entries)r   ÚstartÚ
primitivesÚfamiliesÚentriesr   r   r   r   Y   s    
zCCGLexicon.__init__c             C   s
   | j | S )z@
        Returns all the possible categories for a word
        )r$   )r   Úwordr   r   r   Ú
categories_   s    zCCGLexicon.categoriesc             C   s   | j S )z;
        Return the target category for the parser
        )r#   )r   r   r   r   r%   e   s    zCCGLexicon.startc             C   sn   d}d}x`t | jƒD ]R}|s$|d }|| d }d}x0| j| D ]"}|sR|d }nd}|d|  }q@W qW |S )zK
        String representation of the lexicon. Used for debugging.
        r   TÚ
z => z | Fz%s)Úsortedr$   )r   ÚstringÚfirstÚidentÚcatr   r   r   r   k   s    
zCCGLexicon.__str__N)r   r   r    r!   r   r*   r%   r   r   r   r   r   r"   P   s
   r"   c             C   s”   | dd… }d}xP|dkr`|j dƒ r`|j dƒrFt|ƒ\}}|| }q||d  }|dd… }qW |j dƒr€|d |dd… fS td|  d ƒ‚dS )	zb
    Separate the contents matching the first set of brackets from the rest of
    the input.
    r   Nú(r   ú)r   zUnmatched bracket in string 'ú')Ú
startswithÚmatchBracketsÚAssertionError)r-   ÚrestZinsideÚpartr   r   r   r5   …   s    


r5   c             C   s    | j dƒrt| ƒS tj| ƒjƒ S )zb
    Separate the string for the next portion of the category from the rest
    of the string
    r1   )r4   r5   ÚNEXTPRIM_REÚmatchÚgroups)r-   r   r   r   ÚnextCategory™   s    
r<   c             C   s   t | d | dd… ƒS )z'
    Parse an application operator
    r   r   N)r   )Úappr   r   r   ÚparseApplication£   s    r>   c             C   s   | r| dd… j dƒS g S )z7
    Parse the subscripts for a primitive category
    r   ú,r   )Úsplit)Zsubscrr   r   r   ÚparseSubscriptsª   s    rA   c             C   s¦   | d dkr.| d dkr.|dkr&t ƒ }||fS | d }||krp|| \}}|dkrX|}n|j||fgƒ}||fS ||kr’t| d ƒ}t||ƒ|fS td| d ƒ‚dS )zƒ
    Parse a primitive category

    If the primitive is the special category 'var', replace it with the
    correct `CCGVar`.
    r   Úvarr   NzString 'z-' is neither a family nor primitive category.)r   Ú
substituterA   r   r6   )Úchunksr&   r'   rB   Úcatstrr0   ZcvarZsubscrsr   r   r   ÚparsePrimitiveCategory³   s"    rF   c       
      C   sä   t | ƒ\}}|jdƒr2t|dd… |||ƒ\}}nttj|ƒjƒ |||ƒ\}}xŒ|dkrÚtj|ƒjƒ }t|dd… ƒ}|d }t |ƒ\}}|jdƒr°t|dd… |||ƒ\}	}nttj|ƒjƒ |||ƒ\}	}t	||	|ƒ}qPW ||fS )z{
    Parse a string representing a category, and returns a tuple with
    (possibly) the CCG variable for the category
    r1   r   r   r   é   r   r   )
r<   r4   ÚaugParseCategoryrF   ÚPRIM_REr:   r;   ÚAPP_REr>   r   )
Úliner&   r'   rB   Z
cat_stringr7   Úresr=   Ú	directionÚargr   r   r   rH   Ñ   s     


rH   Fc             C   s(  t jƒ  g }i }ttƒ}xü| jƒ D ]ð}tj|ƒjƒ d jƒ }|dkrFq"|j	dƒrv|dd„ |dd… jƒ j
dƒD ƒ }q"tj|ƒjƒ \}}}tj|ƒjƒ \}	}
t|	||ƒ\}}|d	krÂ||f||< q"d}|d
krü|
dkrät|d ƒ‚ntjtj|
ƒjƒ d ƒ}|| jt|||ƒƒ q"W t|d |||ƒS )z@
    Convert string representation into a lexicon for CCGs.
    r   r   z:-c             S   s   g | ]}|j ƒ ‘qS r   )Ústrip)Ú.0Zprimr   r   r   ú
<listcomp>  s    zfromstring.<locals>.<listcomp>é   Nr?   z::Tz@ must contain semantics because include_semantics is set to True)r   Zreset_idr   ÚlistÚ
splitlinesÚCOMMENTS_REr:   r;   rO   r4   r@   ÚLEX_REÚRHS_RErH   r6   r   Ú
fromstringÚSEMANTICS_REÚappendr	   r"   )Úlex_strZinclude_semanticsr&   r'   r(   rK   r/   ÚsepÚrhsrE   r   r0   rB   r   r   r   r   rX   ñ   s4    
$
rX   zUse fromstring() instead.c             C   s   t | ƒS )N)rX   )r[   r   r   r   ÚparseLexicon"  s    r^   a¡  
    # Rather minimal lexicon based on the openccg `tinytiny' grammar.
    # Only incorporates a subset of the morphological subcategories, however.
    :- S,NP,N                    # Primitive categories
    Det :: NP/N                  # Determiners
    Pro :: NP
    IntransVsg :: S\NP[sg]    # Tensed intransitive verbs (singular)
    IntransVpl :: S\NP[pl]    # Plural
    TransVsg :: S\NP[sg]/NP   # Tensed transitive verbs (singular)
    TransVpl :: S\NP[pl]/NP   # Plural

    the => NP[sg]/N[sg]
    the => NP[pl]/N[pl]

    I => Pro
    me => Pro
    we => Pro
    us => Pro

    book => N[sg]
    books => N[pl]

    peach => N[sg]
    peaches => N[pl]

    policeman => N[sg]
    policemen => N[pl]

    boy => N[sg]
    boys => N[pl]

    sleep => IntransVsg
    sleep => IntransVpl

    eat => IntransVpl
    eat => TransVpl
    eats => IntransVsg
    eats => TransVsg

    see => TransVpl
    sees => TransVsg
    )N)F)!r!   ÚreÚcollectionsr   Znltk.ccg.apir   r   r   r   Znltk.internalsr   Znltk.sem.logicr   ÚcompilerI   r9   rJ   ÚUNICODErV   rW   rY   rU   r	   r"   r5   r<   r>   rA   rF   rH   rX   r^   Zopenccg_tinytinyr   r   r   r   Ú<module>	   s0   



#5
	
 
1*