3
dbG                 @   s  d Z ddlmZ ddlmZmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZ G dd deZG dd deeZG d	d
 d
eZG dd deZG dd deZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$e e e e e gZ%e e e e gZ&e e
 e e gZ'e e
 e e gZ(e e e  gZ)G dd deZ*G dd de*Z+G dd  d e*Z,G d!d" d"e*Z-G d#d$ d$e*Z.G d%d& d&e*Z/e e e" e# e$ gZ0e e e e! gZ1e e e e! gZ2e e e e! gZ3G d'd( d(e*eZ4G d)d* d*e4Z5G d+d, d,e4Z6G d-d. d.e4Z7G d/d0 d0e4Z8d:d6d7Z9e:d8kre9  d9S );a  
Data classes and parser implementations for *incremental* chart
parsers, which use dynamic programming to efficiently parse a text.
A "chart parser" derives parse trees for a text by iteratively adding
"edges" to a "chart".  Each "edge" represents a hypothesis about the tree
structure for a subsequence of the text.  The "chart" is a
"blackboard" for composing and combining these hypotheses.

A parser is "incremental", if it guarantees that for all i, j where i < j,
all edges ending at i are built before any edges ending at j.
This is appealing for, say, speech recognizer hypothesis filtering.

The main parser class is ``EarleyChartParser``, which is a top-down
algorithm, originally formulated by Jay Earley (1970).
    )perf_counter)BottomUpPredictCombineRuleBottomUpPredictRuleCachedTopDownPredictRuleChartChartParserEdgeIEmptyPredictRule"FilteredBottomUpPredictCombineRule!FilteredSingleEdgeFundamentalRuleLeafEdgeLeafInitRuleSingleEdgeFundamentalRuleTopDownInitRule)!FeatureBottomUpPredictCombineRuleFeatureBottomUpPredictRuleFeatureChartFeatureChartParserFeatureEmptyPredictRule FeatureSingleEdgeFundamentalRuleFeatureTopDownInitRuleFeatureTopDownPredictRulec               @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )IncrementalChartc             C   s(   t dd | j D | _i | _i | _d S )Nc             s   s   | ]
}g V  qd S )N ).0xr   r   6/tmp/pip-build-v9q4h5k9/nltk/nltk/parse/earleychart.py	<genexpr>@   s    z.IncrementalChart.initialize.<locals>.<genexpr>)tuple
_positions
_edgelistsZ_edge_to_cpls_indexes)selfr   r   r   
initialize>   s    zIncrementalChart.initializec             C   s   t | j S )N)list	iteredges)r"   r   r   r   edgesI   s    zIncrementalChart.edgesc             C   s   dd | j D S )Nc             s   s   | ]}|D ]
}|V  q
qd S )Nr   )r   edgelistedger   r   r   r   M   s    z-IncrementalChart.iteredges.<locals>.<genexpr>)r    )r"   r   r   r   r%   L   s    zIncrementalChart.iteredgesc                sr   | j | } i krt|S t j }t|}|| jkrB| j| t fdd|D }t| j| | j|g S )Nc             3   s   | ]} | V  qd S )Nr   )r   key)restrictionsr   r   r   ^   s    z*IncrementalChart.select.<locals>.<genexpr>)r    itersortedkeysr   r!   
_add_indexget)r"   endr*   r'   
restr_keysvalsr   )r*   r   selectO   s    


zIncrementalChart.selectc                s   x"|D ]}t t|std| qW tdd | j D  }| j|< xTt| jD ]F\}}|| }x4|D ], t fdd|D }|j|g j	  qfW qPW d S )NzBad restriction: %sc             s   s   | ]
}i V  qd S )Nr   )r   r   r   r   r   r   h   s    z.IncrementalChart._add_index.<locals>.<genexpr>c             3   s   | ]}t  | V  qd S )N)getattr)r   r)   )r(   r   r   r   n   s    )
hasattrr   
ValueErrorr   r   r!   	enumerater    
setdefaultappend)r"   r1   r)   indexr0   r'   
this_indexr2   r   )r(   r   r.   a   s    

 
zIncrementalChart._add_indexc                sP    j  }xB| jj D ]4\}}t fdd|D }|| j|g j  qW d S )Nc             3   s   | ]}t  | V  qd S )N)r4   )r   r)   )r(   r   r   r   t   s    z:IncrementalChart._register_with_indexes.<locals>.<genexpr>)r0   r!   itemsr   r8   r9   )r"   r(   r0   r1   r:   r2   r   )r(   r   _register_with_indexesq   s    z'IncrementalChart._register_with_indexesc             C   s   | j |j  j| d S )N)r    r0   r9   )r"   r(   r   r   r   _append_edgew   s    zIncrementalChart._append_edgec             C   s   t | j d S )N   )range
num_leaves)r"   r   r   r   r   z   s    zIncrementalChart._positionsN)__name__
__module____qualname__r#   r&   r%   r3   r.   r=   r>   r   r   r   r   r   r   =   s   r   c               @   s$   e Zd Zdd Zdd Zdd ZdS )FeatureIncrementalChartc                st   j | } i krt|S t j }t|}|jkrBj| t fdd|D }tj| | j|g S )Nc             3   s   | ]}j  | V  qd S )N)_get_type_if_possible)r   r)   )r*   r"   r   r   r      s    z1FeatureIncrementalChart.select.<locals>.<genexpr>)r    r+   r,   r-   r   r!   r.   r/   )r"   r0   r*   r'   r1   r2   r   )r*   r"   r   r3      s    


zFeatureIncrementalChart.selectc                s   x"|D ]}t t|std| qW tdd j D  }j|< xVtjD ]H\}}|| }x6|D ]. t fdd|D }|j|g j	  qfW qPW d S )NzBad restriction: %sc             s   s   | ]
}i V  qd S )Nr   )r   r   r   r   r   r      s    z5FeatureIncrementalChart._add_index.<locals>.<genexpr>c             3   s    | ]}j t | V  qd S )N)rF   r4   )r   r)   )r(   r"   r   r   r      s   )
r5   r   r6   r   r   r!   r7   r    r8   r9   )r"   r1   r)   r:   r0   r'   r;   r2   r   )r(   r"   r   r.      s    

 

z"FeatureIncrementalChart._add_indexc                sR    j  }xDjj D ]6\}}t fdd|D }|| j|g j  qW d S )Nc             3   s    | ]}j t | V  qd S )N)rF   r4   )r   r)   )r(   r"   r   r   r      s    zAFeatureIncrementalChart._register_with_indexes.<locals>.<genexpr>)r0   r!   r<   r   r8   r9   )r"   r(   r0   r1   r:   r2   r   )r(   r"   r   r=      s
    z.FeatureIncrementalChart._register_with_indexesN)rB   rC   rD   r3   r.   r=   r   r   r   r   rE   ~   s   rE   c               @   s   e Zd Zdd ZdS )CompleteFundamentalRulec             c   sN   |j  }x@|j||d|j dD ]&}|j|j  }|j|||r |V  q W d S )NT)startr0   is_completelhs)r0   r3   nextsymZmove_dot_forwardZinsert_with_backpointer)r"   chartgrammar	left_edger0   
right_edgenew_edger   r   r   _apply_incomplete   s    z)CompleteFundamentalRule._apply_incompleteN)rB   rC   rD   rQ   r   r   r   r   rG      s   rG   c               @   s   e Zd Ze Zdd ZdS )CompleterRulec             c   s$   t |ts | jj|||E d H  d S )N)
isinstancer   _fundamental_ruleapply)r"   rL   rM   r(   r   r   r   rU      s    
zCompleterRule.applyN)rB   rC   rD   rG   rT   rU   r   r   r   r   rR      s   rR   c               @   s   e Zd Ze Zdd ZdS )ScannerRulec             c   s$   t |tr | jj|||E d H  d S )N)rS   r   rT   rU   )r"   rL   rM   r(   r   r   r   rU      s    
zScannerRule.applyN)rB   rC   rD   rG   rT   rU   r   r   r   r   rV      s   rV   c               @   s   e Zd ZdS )PredictorRuleN)rB   rC   rD   r   r   r   r   rW      s   rW   c               @   s   e Zd Zdd ZdS )FilteredCompleteFundamentalRulec             c   s    |j  r| j|||E d H  d S )N)rI   Z_apply_complete)r"   rL   rM   r(   r   r   r   rU      s    z%FilteredCompleteFundamentalRule.applyN)rB   rC   rD   rU   r   r   r   r   rX      s   rX   c               @   s   e Zd Zdd ZdS )FeatureCompleteFundamentalRulec             c   sH   | j }|j }x4|j||d|j dD ]}|j||||E d H  q&W d S )NT)rH   r0   rI   rJ   )rT   r0   r3   rK   rU   )r"   rL   rM   rN   frr0   rO   r   r   r   rQ      s
    z0FeatureCompleteFundamentalRule._apply_incompleteN)rB   rC   rD   rQ   r   r   r   r   rY      s   rY   c               @   s   e Zd Ze ZdS )FeatureCompleterRuleN)rB   rC   rD   rY   rT   r   r   r   r   r[      s   r[   c               @   s   e Zd Ze ZdS )FeatureScannerRuleN)rB   rC   rD   rY   rT   r   r   r   r   r\      s   r\   c               @   s   e Zd ZdS )FeaturePredictorRuleN)rB   rC   rD   r   r   r   r   r]      s   r]   c               @   s,   e Zd ZdZeddefddZd	ddZdS )
IncrementalChartParsera  
    An *incremental* chart parser implementing Jay Earley's
    parsing algorithm:

    | For each index end in [0, 1, ..., N]:
    |   For each edge such that edge.end = end:
    |     If edge is incomplete and edge.next is not a part of speech:
    |       Apply PredictorRule to edge
    |     If edge is incomplete and edge.next is a part of speech:
    |       Apply ScannerRule to edge
    |     If edge is complete:
    |       Apply CompleterRule to edge
    | Return any complete parses in the chart
    r   2   c             C   sn   || _ || _|| _|| _g | _g | _xD|D ]<}|jdkrF| jj| q*|jdkr^| jj| q*tdq*W dS )a  
        Create a new Earley chart parser, that uses ``grammar`` to
        parse texts.

        :type grammar: CFG
        :param grammar: The grammar used to parse texts.
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            and higher numbers will produce more verbose tracing
            output.
        :type trace_chart_width: int
        :param trace_chart_width: The default total width reserved for
            the chart in trace output.  The remainder of each line will
            be used to display edges.
        :param chart_class: The class that should be used to create
            the charts used by this parser.
        r   r?   z9Incremental inference rules must have NUM_EDGES == 0 or 1N)	_grammar_trace_trace_chart_width_chart_class_axioms_inference_rulesZ	NUM_EDGESr9   r6   )r"   rM   strategytracetrace_chart_widthchart_classruler   r   r   __init__/  s    


zIncrementalChartParser.__init__Nc             C   s@  |d kr| j }| j}t|}| jj| | j|}| j}| j|j d  }|r\t|j	| x.| j
D ]$}t|j||}|||||| qdW | j}	xt|j d D ]}
|dkrtd|
d t|j|
d}xj|r6|j }xV|	D ]N}t|j|||}|||||| x&|D ]}|j |
kr|j| qW qW qW qW |S )Nr?   z
* Processing queue:
)r0   )ra   Z_trace_new_edgesr$   r`   Zcheck_coveragerc   rb   rA   printZpretty_format_leavesrd   rU   re   r@   r3   popr0   r9   )r"   tokensrg   Ztrace_new_edgesrL   rM   Ztrace_edge_widthZaxiomZ	new_edgesZinference_rulesr0   Zagendar(   rj   rP   r   r   r   chart_parseZ  s6    


z"IncrementalChartParser.chart_parse)N)rB   rC   rD   __doc__BU_LC_INCREMENTAL_STRATEGYr   rk   rp   r   r   r   r   r^     s   %r^   c               @   s   e Zd Zdd ZdS )EarleyChartParserc             K   s   t j| |tf| d S )N)r^   rk   EARLEY_STRATEGY)r"   rM   parser_argsr   r   r   rk     s    zEarleyChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   rs   ~  s   rs   c               @   s   e Zd Zdd ZdS )IncrementalTopDownChartParserc             K   s   t j| |tf| d S )N)r^   rk   TD_INCREMENTAL_STRATEGY)r"   rM   ru   r   r   r   rk     s    z&IncrementalTopDownChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   rv     s   rv   c               @   s   e Zd Zdd ZdS )IncrementalBottomUpChartParserc             K   s   t j| |tf| d S )N)r^   rk   BU_INCREMENTAL_STRATEGY)r"   rM   ru   r   r   r   rk     s    z'IncrementalBottomUpChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   rx     s   rx   c               @   s   e Zd Zdd ZdS )(IncrementalBottomUpLeftCornerChartParserc             K   s   t j| |tf| d S )N)r^   rk   rr   )r"   rM   ru   r   r   r   rk     s    z1IncrementalBottomUpLeftCornerChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   rz     s   rz   c               @   s   e Zd Zdd ZdS ) IncrementalLeftCornerChartParserc             K   s&   |j  stdtj| |tf| d S )NzNIncrementalLeftCornerParser only works for grammars without empty productions.)Zis_nonemptyr6   r^   rk   LC_INCREMENTAL_STRATEGY)r"   rM   ru   r   r   r   rk     s
    z)IncrementalLeftCornerChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   r{     s   r{   c               @   s   e Zd ZedefddZdS )FeatureIncrementalChartParser   c             K   s    t j| |f|||d| d S )N)rf   rh   ri   )r^   rk   )r"   rM   rf   rh   ri   ru   r   r   r   rk     s    z&FeatureIncrementalChartParser.__init__N)rB   rC   rD   "BU_LC_INCREMENTAL_FEATURE_STRATEGYrE   rk   r   r   r   r   r}     s   r}   c               @   s   e Zd Zdd ZdS )FeatureEarleyChartParserc             K   s   t j| |tf| d S )N)r}   rk   EARLEY_FEATURE_STRATEGY)r"   rM   ru   r   r   r   rk     s    z!FeatureEarleyChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   r     s   r   c               @   s   e Zd Zdd ZdS )$FeatureIncrementalTopDownChartParserc             K   s   t j| |tf| d S )N)r}   rk   TD_INCREMENTAL_FEATURE_STRATEGY)r"   rM   ru   r   r   r   rk     s    z-FeatureIncrementalTopDownChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   r     s   r   c               @   s   e Zd Zdd ZdS )%FeatureIncrementalBottomUpChartParserc             K   s   t j| |tf| d S )N)r}   rk   BU_INCREMENTAL_FEATURE_STRATEGY)r"   rM   ru   r   r   r   rk     s    z.FeatureIncrementalBottomUpChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   r     s   r   c               @   s   e Zd Zdd ZdS )/FeatureIncrementalBottomUpLeftCornerChartParserc             K   s   t j| |tf| d S )N)r}   rk   r   )r"   rM   ru   r   r   r   rk     s    z8FeatureIncrementalBottomUpLeftCornerChartParser.__init__N)rB   rC   rD   rk   r   r   r   r   r     s   r   TF   $I saw John with a dog with my cookie   c             C   s   ddl }ddl}ddlm} | }	|r6td t|	 td t| |j }
t|
 t  t|	|d}t }|j|
}t	|j
|	j }t | }|rt||kstd|rx$|D ]}t| qW ntdt| | rtd	| dS )
z0
    A demonstration of the Earley parsers.
    r   N)demo_grammarz	* Grammarz* Sentence:)rg   zNot all parses foundz	Nr trees:zTime:)systimenltk.parse.chartr   rm   splitrs   r   rp   r$   parsesrH   lenAssertionError)Zprint_timesZprint_grammarZprint_treesrg   sentZ	numparsesr   r   r   rM   ro   ZearleytrL   r   treer   r   r   demo  s2    


r   __main__N)TFTr   r   r   );rq   r   r   r   r   r   r   r   r   r   r	   r
   r   r   r   r   r   Znltk.parse.featurechartr   r   r   r   r   r   r   r   r   rE   rG   rR   rV   rW   rX   rY   r[   r\   r]   rt   rw   ry   rr   r|   r^   rs   rv   rx   rz   r{   r   r   r   r   r}   r   r   r   r   r   rB   r   r   r   r   <module>   s   <(A6	_     
*
