3
d                 @   s   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lm Z  ddl!m"Z" G dd dZ#dd Z$e%dkre$  dgZ&dS )zl
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
    N)
ButtonCanvasCheckbuttonFrameIntVarLabelMenu	ScrollbarTextTk)askopenfilenameasksaveasfilename)Font)
ChunkScoreRegexpChunkParser)RegexpChunkRule)	conll2000treebank_chunk)ShowText)Tree)in_idlec            .   @   s  e Zd ZdZdddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/-ZddddgZd>ed?d@fdAedBd@fdCedDdEfdFedGdHfdIedGdHfdJedKdKdLfdMedNdOdLfdPedQd@fdRedSd@fdTedUd@fg
ZdVZdWZ	dXZ
dYZdZZed[d\d]d]dVd^d_d`daZedWdWd]d]dbdVd^d_d`dc	ZedddedfdfdVd^d_d`ddh	Zedid^d_djZedkddlZedmd_d_dndoZedfdfdVd^d_dpdqdrZedmdmdmdsZdtZd]Zed]dEZedtdEZduZdvdw Zdd|d}Zd~d Zdd Zdd Zdd ZdZ dZ!dd Z"dZ#dd Z$dd Z%dd Z&dZ'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-dd Z.dd Z/dd Z0dddZ1dd Z2dd Z3dd Z4dd Z5dddZ6dd Z7dd Z8dd Z9dZ:dddZ;dddZ<dddZ=dd Z>dddZ?dddZ@dd ZAdyS )RegexpChunkAppz
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    zCoordinating conjunctionzPossessive pronounzCardinal numberZAdverbZ
DeterminerzAdverb, comparativezExistential therezAdverb, superlativezForeign wordZParticleZ	AdjectiveZtozAdjective, comparativeZInterjectionzAdjective, superlativezVerb, base formzList item markerzVerb, past tenseZModalzNoun, pluralzNoun, singular or maspszVerb, past participlezVerb,3rd ps. sing. presentzProper noun, singularzProper noun pluralzwh-determinerZPredeterminerz
wh-pronounzPossessive endingzPossessive wh-pronounzPersonal pronounz	wh-adverbzopen parenthesiszclose parenthesisz
open quotecommazclose quoteZperiodzpound sign (currency marker)zdollar sign (currency marker)zPreposition/subord. conjunctionz#Symbol (mathematical or scientific)zVerb, gerund/present participlezVerb, non-3rd ps. sing. presentZcolon)-CCzPRP$ZCDZRBZDTZRBRZEXZRBSZFWZRPZJJZTOZJJRZUHZJJSZVBZLSZVBDZMDZNNSZNNZVBNZVBZZNNPZNNPSZWDTPDTZWPPOSzWP$ZPRPZWRB()z``,z''.#$INZSYMZVBGZVBP:Help20-  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their harmonic mean (the 'f-score'), are displayed in the status bar at the bottom of the window.Rules10  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Strip rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
Regexps10 60Z  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
\t<regexp><\#><CD> # This is a comment...</regexp>\n		Matches <match>"#/# 100/CD"</match>
</hangindent>Tagsz<h1>Part of Speech Tags:</h1>
z<hangindent>z
<<TAGSET>>z</hangindent>
Zredz#a00)
foregroundZgreenz#080Z	highlightz#ddd)
background	underlineT)r0   h1indent   )lmargin1lmargin2
hangindentr   <   varz#88fregexpz#ba7matchz#6a6      g?g{Gz?g{Gz?(      z#efeZgroove   word)widthheightr/   highlightbackgroundhighlightthicknessreliefborderwrapz#555)	rA   rB   r/   rC   r.   rD   rE   rF   rG   F   
   z#eef   )	rA   rB   r/   rC   rD   rE   rF   rG   tabsz#9bb)r/   rE   rF   	helvetica)familysizez#777   )r/   padxpadyrF   i,  i  )r/   rC   rD   rE   rF   rA   rB   )r/   ZactivebackgroundrC   z#aba   c             C   sD   t jdd|}t jdd|}t jdd|}|j }t jdd|}|S )	Nz((\\.|[^#])*)(#.*)?z\1z + z\n\s+z\nz	([^\\])\$z\1\\$)resubstrip)selfgrammar rY   8/tmp/pip-build-v9q4h5k9/nltk/nltk/app/chunkparser_app.pynormalize_grammar4  s    z RegexpChunkApp.normalize_grammarr   N NPc             C   s`  || _ |dkr| j}|| _|dkrT|dkr6tjd}n|dkrHtj }ntd| d| _|| _d| _	d| _
|| _|| _d| _d| _g | _d| _d| _d| _d| _t|d| _t  }| _|jd	 |jd
 |jd| j t|| _| jjd | j| | j | | j!| | j"| | j#j$  |rJ| j#j%d|d  | j#j&dd | j'd | j(  dS )a  
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        Nr   z	train.txtZtreebankzUnknown development set %sr   r;   )chunk_labelz+50+50zRegexp Chunk Parser Appz<Control-q>d   end
insertz1.0))_chunk_labelTAGSETtagsetr   Zchunked_sentsr   
ValueErrorchunkerrX   normalized_grammargrammar_changeddevsetdevset_namedevset_index_last_keypress_history_history_index_eval_grammar_eval_normalized_grammar_eval_indexr   _eval_scorer   topZgeometrytitlebinddestroyr   _devset_sizeset_init_fonts_init_widgets_init_bindings_init_menubar
grammarboxZfocusrb   mark_setshow_devsetupdate)rW   rl   rk   rX   r^   rf   ru   rY   rY   rZ   __init__?  sP    









zRegexpChunkApp.__init__c                s   |j d j |j d j |j d j |j d j |j d fdd |j d fd	d  jj d j  jj d j  jj d j  jj d
 j d S )Nz<Control-n>z<Control-p>z<Control-t>z
<KeyPress>z<Control-s>c                s    j  S )N)save_grammar)e)rW   rY   rZ   <lambda>  s    z/RegexpChunkApp._init_bindings.<locals>.<lambda>z<Control-o>c                s    j  S )N)load_grammar)r   )rW   rY   rZ   r     s    z<Configure>)rw   _devset_next_devset_prevtoggle_show_tracer   r   evalbox
_eval_plot)rW   ru   rY   )rW   rZ   r}     s    zRegexpChunkApp._init_bindingsc             C   sR   t || _| jjd td| jj  d| _tdt| jj d d  d| _d S )Nr3   rL   )rM   rN      )r   _sizerz   r   get_fontint
_smallfont)rW   ru   rY   rY   rZ   r{     s
    
zRegexpChunkApp._init_fontsc             C   s  t |}t |dd}|jdd| jd |jddd| jd |jddd	| jd |jd
d| jd |jdd| jdd |jdd|d t |dd}|jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jd| j	dd| j
d |jdd|d t |dd}|jd| jd| jd  |jd!| jd"| jd  |jd#| jd$| jd  |jd%| jd&| jd  |jd'd|d t |dd}|jd(d| jd |jd)d|d |j|d* d S )+Nr   )ZtearoffzReset Application)labelr0   commandzSave Current GrammarzCtrl-s)r   r0   acceleratorr   zLoad GrammarzCtrl-ozSave Grammar History   ZExitr;   zCtrl-q)r   r0   r   r   ZFile)r   r0   menuZTinyrI   )r   variabler0   valuer   ZSmall   ZMediumr3   ZLarge   ZHuge"   ZViewz50 sentences2   )r   r   r   r   z100 sentencesr_   z200 sentences   z500 sentencesi  zDevelopment-SetZAboutr$   )r   )r   Zadd_commandresetr   r   save_historyrx   Zadd_cascadeZadd_radiobuttonr   resizery   set_devset_sizeaboutconfig)rW   parentZmenubarZfilemenuZviewmenuZ
devsetmenuZhelpmenurY   rY   rZ   r~     s    










zRegexpChunkApp._init_menubarc             G   s   | j r| j  n| j  dS )Nbreak)_showing_tracer   
show_trace)rW   r   rY   rY   rZ   r   #  s    
z RegexpChunkApp.toggle_show_trace   Fc          
   O   st  |j d| jj }|j d| jj }| jjd | jjd|d d dddd	}| jj|d d
 |d  }}| jj||| d  |d dddd}d| jj|d d  }}	| jd }
| jj| jj	dd|d d|
|
d | jj| jj	d|	d dd|
|
d | j
j  rt| jdkrd }}d }}xbtdtt| j| jd D ]B}| j|  \}}}}t||}t||}t||}t||}qHW t|d d}t|d d}t|d d}t|d d}nd }}d }}xtdD ]}||| |d | ||    }|	|	| |d | ||    }||  k o:|k n  rX| jj||||	dd ||  k ol|	k n  r| jj||||dd qW | jj||||	 | jj||	||	 | jj|d |	dddd|  d	 | jj|d |dddd|  d	 | jj||	d dddd|  d	 | jj||	d dddd|  d	 d  }}x(t| jD ]\}\}}}}||| || ||    }|	|	| || ||    }|| jkr| jj|d |d |d |d d d!d d"|d  d#|d   d$|d   | jd%< n0| jj| jj|d |d |d |d d&d'd |d k	r`| jj  r`| jj| jj||||d'd || }}qRW d S )(NrA   rB   allrI   r?   leftwZ	Precision)justifyanchortextr   sZRecallcenter)r   r   r   r;   r/   r   i  )filloutlineg{Gz?   g      $@z#888)r   rO   rightsez%d%%r_   nenwz#0f0z#000zPrecision: %.2f%%	zRecall: %.2f%%	zF-score: %.2f%%r   z#afaz#8c8)r   r   Zwinfo_widthZwinfo_heightdeleteZcreate_textZbbox_EVALBOX_PARAMSlowerZcreate_rectangle
_autoscalelenro   rangemin_SCALE_NmaxZcreate_line	enumeraterp   Zcreate_ovalstatus_eval_lines)rW   r   r   rA   rB   tagr   r   ru   ZbotbgZmax_precisionZ
max_recallZmin_precisionZ
min_recallirX   	precisionrecallZfmeasurexyZprev_xZprev_y_fscorerY   rY   rZ   r   -  s    
"


 ",$zRegexpChunkApp._eval_plotc       	      C   s,  | j d krd S | jd kr"d| _d S tj }tj | j | jk rj| j| jkrjd| _| j jt	| j
d | jS | j| jkrxb| jD ]X\}}}}| j| j|kr~| jj||||f t| jd | _| j  d| _d | _d S q~W d| _t| jd| _| j| _| j| _| jj dkrd| _d S xJ| j| jt| j| j | jj  D ]"}| j|j }| jj || q>W |  j| j7  _| j| jj kr| jj| j| jj! | jj" | jj# f t| jd | _| j  d| _d | _nTd| j | jj  }d	| | j$d
< d| _| j%tj |  | j jt	| j
d | j d S )NFTi  r;   r   )r^   r\   r_   z$Evaluating on Development Set (%d%%)r   )&ru   rh   _eval_demon_runningtimern   _EVAL_DELAYri   rr   afterr   
_EVAL_FREQ_eval_demonro   r[   appendr   rp   r   rs   r   rd   rt   rX   rq   rV   rk   r   _EVAL_CHUNKry   r   _chunkparseleavesZscorer   r   Z	f_measurer   _adaptively_modify_eval_chunk)	rW   t0gprfZgoldguessprogressrY   rY   rZ   r     s^    

zRegexpChunkApp._eval_demonc             C   s   || j krD| jdkrDt| jd tt| j| j |  | jd | _n8|| jk r|t| jd tt| j| j|  | jd | _dS )z
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        r   r;   rI   N)_EVAL_DEMON_MAXr   r   r   r   _EVAL_DEMON_MIN)rW   trY   rY   rZ   r     s    
z,RegexpChunkApp._adaptively_modify_eval_chunkc                s  t |f j}|jddd |jddd |jddd |jddd t|fd ji j _t| jd	d
 jd d _	 j	j
dddd  jj
dddd t| jjd}|j
dddd  jj|jd  jd }t ||d}|j
dddd t|fd jd jjdd t|fd jd jjdd t|fd ji j _ jj
dddd i  _ jd }t ||d}|j
dddd xt jD ]z\}\}}	}
t|| jd}|j
|d ddd |jd|f fdd	 | j|< t |d j|dj
|d d dd  qW  j jd d  j jd!  jjd"d#d$ x( jD ]\}} jjd%| f| qRW  j jd d  t| jjd} jj|jd |j
dddd t | jd d}t|fd ji j  _! j!jd#d&d' t| jd(d) j d d* _" j"j
dddd |j
dddd t| j#d _$ j$j
dddd t| j!j%d+d, _& j&j j!d-<  j&jd.d/d0  jd }t ||d}|j
dd1dd t|fd2 j'd jjdd t|fd3 j(d jjdd t|fd4 j)d5d6 j _* j*jd)d t|fd7 j+d j _, j,jd)d t-|f j. _/t| jd8d) j.d d*}|j
dddd  j/j
ddddd9  jd }t ||d}|j
dd1dd t0 j1 _2 j2jd: t3|f j2 j4d;d< jjdd t0 j1 _5 j5jd: t3|f j5 j4d=d< jjdd t|fd>d?i jjd)d t|fd ji j6 _7 j7j
dd@dAddddB d5 jdC< d5 j!dC<  jd }t |dDd|dj
ddd  t |ddD|dj
ddd  t |dEd|dj
ddFd  |jd&d#dG  j!jdHdIdJdK  j!jdLdJdMdN  j!jdOdPd  j!jdQdRdSdT  j!jdUdVdSdW  j!jdXdMdY  jjdXdZd  jjd[d\dY  jjd]d^dY  jjd_d`dY  jjdaddbdc d S )dNr      )ZweightrO   r?   r;   r   fontzGrammar:Zblackr/   )r   r   Zhighlightcolorr/   ZSW)columnrowstickyZNEWS)r   ZNWS)Zyscrollcommand)r/   ZEWzPrev Grammar)r   r   r   )sidezNext Grammar)r   r   Sz<ButtonPress>c                s
    j |S )N)	show_help)r   tab)rW   rY   rZ   r   N  s    z.RegexpChunkApp._init_widgets.<locals>.<lambda>)rB   rA   r/   )r   r   )r   elideT)r   ztag-%sZboth)expandr   zDevelopment Set:r   )r   r   r   r/   Zhoriz)r   ZorientZxscrollcommandZbottomr   )r   r      zPrev Example (Ctrl-p)zNext Example (Ctrl-n)zShow exampledisabled)r   r   statez
Show tracezEvaluation:)r   r   r   
columnspanFZZoom)r   r   r   ZLinesr   ZHistory	   ZNEW)r   r   r   rP   rQ   r   r   rI   rR      )r   r   ztrue-posz#afaTrue)r/   r0   z	false-negz#800)r0   r.   z	false-posz#faatracez#666none)r.   rG   
wrapindentrJ   )r5   rG   error)r.   z#feccommentz#840anglez#00fbracez#0a0r6   r=   )r4   r5   )8r   _FRAME_PARAMSZgrid_columnconfigureZgrid_rowconfigurer
   r   _GRAMMARBOX_PARAMSr   r   grammarlabelZgridr	   Zyviewr   rz   r   _history_prev_BUTTON_PARAMSpack_history_nextr   _HELPBOX_PARAMShelpboxhelptabsr   HELPrw   _HELPTAB_SPACER	configureZ
tag_configHELP_AUTOTAGr   _DEVSETBOX_PARAMS	devsetboxdevsetlabel_devset_scrolldevset_scrollZxviewdevset_xscrollr   r   r   devset_buttonr   trace_buttonr   r   r   r   ru   r   r   r   r   _STATUS_PARAMSr   )rW   ru   Zframe0Zgrammar_scrollbarr   Zframe3Zhelptab_framer   r   tabstopsr   r   r   paramsZhelp_scrollbarZframe4Zframe1Zframe2rY   )rW   rZ   r|     s&   













zRegexpChunkApp._init_widgetsc                sx  d| _ d| jd< d| jd< d| jd< | jjdd d| jd | jj f | jd	< | j	d kr|| jj
dd
 | jjddd d S | j| j }| j	j }d}dg x6t|j D ]&\}\}}|d| 7 } jt| qW  fddtt|d D | _dd tt|d D | _x,tt|d D ]}|dkrT| jj
dd | jjddd n*| jj
dd||d    | jjddd | jj
d|d  | jjddd t|d | }	| j|j }
| j|}| j|
}x"|j|D ]}| j||d qW x || D ]}| j||d q W x || D ]}| j||d q"W q$W | jj
dd | jjddd | jjd| jjdd d S )NTr   r   normalz1.0r`   zDevelopment Set (%d/%d)r;   r   z#Trace: waiting for a valid grammar.r   	z%s c                s,   i | ]$}t t D ]} | ||fqqS rY   )r   r   ).0r   j)charnumrY   rZ   
<dictcomp>  s   z-RegexpChunkApp.show_trace.<locals>.<dictcomp>c             S   s   i | ]}|d  d  |qS )r?   rY   )r  r   rY   rY   rZ   r    s    r   zStart:
r   zend -2c linestartzend -2cz
Apply %s:
ra   r   ztrue-posz	false-negz	false-posz
Finished.
r_   g333333?)r   r  r  r  r   rm   ry   r   r  rh   rb   tag_addrk   rulesr   r   r   r   r   r  linenumr   r   _chunksintersection_color_chunkru   r   r  rz   )rW   r   	gold_treer  Ztagseqwordnumr@   posr   rh   	test_treegold_chunkstest_chunkschunkrY   )r  rZ   r     sT    








zRegexpChunkApp.show_tracec       
   	   C   sv  d| j d< | j jdd xL| jD ]@\}}}||krP|jddjdd tt| jj d	d
 dD }| j	| j
f | j | j j
|d | j jd|d  d}x| jD ]\}}d| d| d}xtj||D ]t}	| j jd||	jd ||	jd  | j jd| ||	jd ||	jd  | j jd||	jd ||	jd  qW qW q"| j	| j
f | j q"W d| j d< d S )Nr  r   z1.0r`   z
<<TAGSET>>ra   c             s   s   | ]}d | V  qdS )z	%s	%sNrY   )r  itemrY   rY   rZ   	<genexpr>  s   z+RegexpChunkApp.show_help.<locals>.<genexpr>c             S   s(   t jd| d rd| d fp&d| d fS )Nz\w+r   r;   )rT   r:   )Zt_wrY   rY   rZ   r   "  s   z*RegexpChunkApp.show_help.<locals>.<lambda>)key)rK   r3   z1.0 + %d charsz(?s)(<z
>)(.*?)(</z>)r   r;   ztag-%sr?   rO   r   z



















)r  r   r  replacejoinsortedlistrf   itemsr  r   _HELPTAB_FG_PARAMSrb   r	  rT   finditerr  startr`   _HELPTAB_BG_PARAMS)
rW   r   namer  r   Cr   r  patternmrY   rY   rZ   r     s0    

$".zRegexpChunkApp.show_helpc             G   s   | j | jd  dS )Nr;   r   )_view_historyrp   )rW   r   rY   rY   rZ   r   9  s    zRegexpChunkApp._history_prevc             G   s   | j | jd  dS )Nr;   r   )r8  rp   )rW   r   rY   rY   rZ   r  =  s    zRegexpChunkApp._history_nextc             C   s.  t dtt| jd |}| js$d S || jkr2d S d| jd< | jjdd | jjd| j| d  | jjdd || _| j	| j| d  | j
| j| d | _| jrdd	 | jjd
D }ng }t|| _| j  | j  | jr| j  | jt| jd k r dj| jd t| j| jd< n
d| jd< d S )Nr   r;   r  r   z1.0r`   rb   c             S   s   g | ]}t j|qS rY   )r   
fromstring)r  linerY   rY   rZ   
<listcomp>U  s   z0RegexpChunkApp._view_history.<locals>.<listcomp>ra   zGrammar {}/{}:r   zGrammar:)r   r   r   ro   rp   r   r   rb   r   _syntax_highlight_grammarr[   ri   splitr   rh   r   _highlight_devsetr   r   formatr   )rW   indexr  rY   rY   rZ   r8  A  s4    


zRegexpChunkApp._view_historyc             G   s   | j ddd dS )Nscrollr;   pager   )r  )rW   r   rY   rY   rZ   r   j  s    zRegexpChunkApp._devset_nextc             G   s   | j ddd dS )NrA  r;   rB  r   rc   )r  )rW   r   rY   rY   rZ   r   n  s    zRegexpChunkApp._devset_prevc             G   s"   | j d krd S | j j  d | _ d S )N)ru   rx   )rW   r   rY   rY   rZ   rx   r  s    

zRegexpChunkApp.destroyc             G   s   d}| j }|dkr:|d jdr:| j| jt|d   nv|dkrn|d jdrn| j| j|t|d    nB|dkr| jtt|d | jj   ndstd| d| |r| j	  d S )	Nr;   rA  unitr   rB  Zmovetozbad scroll command rS   )
r   
startswithr   rm   r   floatry   r   AssertionErrorr   )rW   r   argsNZshowing_tracerY   rY   rZ   r  x  s    "zRegexpChunkApp._devset_scrollc             C   s  |d kr| j }ttd|| jj d }|| j kr>| j r>d S || _ d| _d| jd< d| jd< d| jd< d| jd< | jj	d	d
 d| j d | jj f | j
d< | j| j | j d  }i | _ddi| _xt|D ]\}}d}xXt|j D ]H\}\}}t|| j||f< || d| d7 }t|| j||d f< qW | jjd
|d d d  qW | jd k	rf| j  d| jd< | j | jj  }	| j d | jj  }
| jj|	|
 d S )Nr   r;   Fr  r   r   r@   rG   z1.0r`   zDevelopment Set (%d/%d)r   r\   /rS   z

r?   rc   )rm   r   r   ry   r   r   r  r  r  r   r  rk   r  r  r   r   r   rb   rh   r>  r  rz   )rW   r@  samplesentnumsentZlinestrr"  r@   r#  firstlastrY   rY   rZ   r     s<    





zRegexpChunkApp.show_devsetc             C   s`   t  }d}xP|D ]H}t|trP|j | jkrB|j||t| f |t|7 }q|d7 }qW |S )Nr   r;   )rz   
isinstancer   r   rd   addr   )rW   treechunksr"  childrY   rY   rZ   r    s    

zRegexpChunkApp._chunksc             C   s^  | j d krd S | jjddd | jjddd | jjddd | jjddd xt|jdD ]\}}|j srq`tjd|}d }|j	d	r|j
d	}d
|d |j
d	f }d
|d |jd	f }| jjd|| xtjd|D ]r}|d k	r|j
 |krP d
|d |j
 f }d
|d |j f }|j	 dkrB| jjd|| q| jjd|| qW q`W d S )Nr   z1.0r`   r   r   r6   ra   z(\\.|[^#])*(#.*)?r?   z%d.%dr;   z[<>{}]z<>)ru   r   
tag_remover  r   r=  rV   rT   r:   groupr2  r`   r1  )rW   rX   linenor:  r7  comment_startr   r   rY   rY   rZ   r<    s0    


z(RegexpChunkApp._syntax_highlight_grammarc             C   s   | j d krd S | jjddd g | _xt|jdD ]t\}}tjdd|}|j }|r4yt	j
| W q4 tk
r } z&| jjdd|d  d	|d   W Y d d }~X q4X q4W d
| jd< d S )Nr   z1.0r`   ra   z((\\.|[^#])*)(#.*)?z\1z%s.0r;   z%s.0 lineendr\   r   )ru   r   rT  Z_grammarcheck_errsr   r=  rT   rU   rV   r   r9  rg   r  r   )rW   rX   rV  r:  r   rY   rY   rZ   _grammarcheck  s    
0zRegexpChunkApp._grammarcheckc             G   s  |rt j  | _| jjdd | _}| j|}|| jkr:d S || _| jt| j	d k r^d| j
d< | j| y"|rdd |jdD }ng }W n0 tk
r } z| j| d | _d S d }~X nX t|| _| jjd	dd t j  | _| jr| j  n| j  | js| j  d S )
Nz1.0r`   r;   zGrammar:r   c             S   s   g | ]}t j|qS rY   )r   r9  )r  r:  rY   rY   rZ   r;    s   z)RegexpChunkApp.update.<locals>.<listcomp>ra   r   )r   rn   r   r   rX   r[   ri   rp   r   ro   r   r<  r=  rg   rX  rh   r   rT  rj   r   r   r>  r   r   )rW   eventrX   ri   r  r   rY   rY   rZ   r     s6    








zRegexpChunkApp.updatec             C   s   |d kr| j | j| jd  }| jjddd | jjddd | jjddd xt|D ]\}}| j|j }| j|}| j|}x |j|D ]}| j	||d qW x|| D ]}| j	||d qW x|| D ]}| j	||d qW qXW d S )Nr;   ztrue-posz1.0r`   z	false-negz	false-pos)
rk   rm   r  rT  r   r   r   r  r  r   )rW   rJ  rK  r!  r$  r%  r&  r'  rY   rY   rZ   r>    s    

z RegexpChunkApp._highlight_devsetc             C   sF   y| j j|S  ttfk
r@ } z| jjddd |S d }~X nX d S )Nr   z1.0r`   )rh   parserg   
IndexErrorr   r  )rW   wordsr   rY   rY   rZ   r   5  s
    zRegexpChunkApp._chunkparsec             C   sT   |\}}| j j|| j|  d| j||f  | j|  d| j||f d   d S )Nr   r;   )r  r  r  r  )rW   rK  r'  r   r2  r`   rY   rY   rZ   r   @  s
    zRegexpChunkApp._color_chunkc             C   sH   d | _ d | _d | _d| _g | _d| _| jjdd | jd | j	  d S )Nr   z1.0r`   )
rh   rX   ri   rj   ro   rp   r   r   r   r   )rW   rY   rY   rZ   r   H  s    
zRegexpChunkApp.resetz# Regexp Chunk Parsing Grammar
# Saved %(date)s
#
# Development set: %(devset)s
#   Precision: %(precision)s
#   Recall:    %(recall)s
#   F-score:   %(fscore)s

%(grammar)s
c             C   s   |s ddg}t |dd}|s d S | jrd| j| j| jd d krddd	 | jd dd  D \}}}n$| jd kr|d
 } }}nd } }}t|d2}|j| jtt	j
 | j|||| jj d  W d Q R X d S )NChunk Gramamr.chunk	All files*)	filetypesdefaultextensionr;   r   c             s   s   | ]}d d|  V  qdS )z%.2f%%r_   NrY   )r  vrY   rY   rZ   r)  k  s    z.RegexpChunkApp.save_grammar.<locals>.<genexpr>zGrammar not well formedzNot finished evaluation yetr   )daterk   r   r   r   rX   )r]  r^  )r_  r`  rc   rc   )r   ro   ri   r[   rh   openwriteSAVE_GRAMMAR_TEMPLATEdictr   ctimerl   rX   rV   )rW   filenameftypesr   r   r   outfilerY   rY   rZ   r   a  s*    $
zRegexpChunkApp.save_grammarc             C   s~   |s d
dg}t |dd}|s d S | jjdd | j  t|}|j }W d Q R X tjdd	|j }| jj	d| | j  d S )NChunk Gramamr.chunk	All filesr`  )ra  rb  z1.0r`   z2^\# Regexp Chunk Parsing Grammar[\s\S]*F-score:.*
r\   )rm  rn  )ro  r`  )
r   r   r   r   re  readrT   rU   lstriprb   )rW   rj  rk  infilerX   rY   rY   rZ   r     s    
zRegexpChunkApp.load_grammarc       
      C   sH  |s ddg}t |dd}|s d S t|d}|jd |jdtj   |jd	| j  xvt| jD ]h\}\}}}}d
|d t| j|d |d |d f }	|jd|	  |jdj	dd |j
 j D  qfW | jo| j| j| jd d ks:| jd kr|jd n
|jd |jdj	dd | jj
 j D  W d Q R X d S )NChunk Gramamr History.txt	All filesr`  )ra  rb  r   z'# Regexp Chunk Parsing Grammar History
z# Saved %s
z# Development set: %s
z>Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, fscore=%.2f%%)r;   r_   z
%s
r\   c             s   s   | ]}d | V  qdS )z  %s
NrY   )r  r:  rY   rY   rZ   r)    s    z.RegexpChunkApp.save_history.<locals>.<genexpr>r   z#
Current Grammar (not well-formed)
z!
Current Grammar (not evaluated)
c             s   s   | ]}d | V  qdS )z  %s
NrY   )r  r:  rY   rY   rZ   r)    s    )rs  rt  )ru  r`  rc   )r   re  rf  r   ri  rl   r   ro   r   r,  rV   r=  ri   r[   rh   rX   )
rW   rj  rk  rl  r   r   r   r   r   hdrrY   rY   rZ   r     s,    
&&
zRegexpChunkApp.save_historyc          
   G   sH   d}d}y ddl m} |||dj  W n   t| j|| Y nX d S )Nz%NLTK RegExp Chunk Parser Application
zWritten by Edward Loperz2About: Regular Expression Chunk Parser Applicationr   )Message)messagerv   z<NLTK RegExp Chunk Parser Application
Written by Edward Loper)Ztkinter.messageboxrw  showr   ru   )rW   r   ZABOUTZTITLErw  rY   rY   rZ   r     s    zRegexpChunkApp.aboutc             C   sJ   |d k	r| j j| | j jtt| j| j j  | jd | jd d S )Nr;   r   )ry   rz   r   r   rk   r   r   )rW   rN   rY   rY   rZ   r     s
    
zRegexpChunkApp.set_devset_sizec             C   sX   |d k	r| j j| | j j }| jjt| d | jjtdt| d d d d S )N)rN   rI   r   r3   i)r   rz   r   r   r  absr   r   )rW   rN   rY   rY   rZ   r     s
    
zRegexpChunkApp.resizec             O   s   t  r
dS | jj|| dS )z
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        N)r   ru   mainloop)rW   rG  kwargsrY   rY   rZ   r{    s    zRegexpChunkApp.mainloop)r$   r%   r&   )r'   r(   r)   )r*   r+   r,   z*<h1>Part of Speech Tags:</h1>
<hangindent>z4<h1>Part of Speech Tags:</h1>
<hangindent><<TAGSET>>B<h1>Part of Speech Tags:</h1>
<hangindent><<TAGSET>></hangindent>
)r-   r+   r}  )rJ   i)r   Nr\   r]   N)N)N)N)N)N)N)N)B__name__
__module____qualname____doc__re   r  rh  r	  r   r   r   r   r   r   r  r
  r  Z_FONT_PARAMSr   r   r   Z_HELPTAB_BG_COLORZ_HELPTAB_FG_COLORr0  r3  r  r[   r   r}   r{   r~   r   r   Z_DRAW_LINESr   r   r   r   r|   r   r   r   r   r  r8  r   r   rx   r  r   r  r<  rX  r   r>  r   r   r   rg  r   r   r   r   r   r   r{  rY   rY   rY   rZ   r   -   sL  
  %    "  

    
e	b K >?")
-3




!


r   c               C   s   t  j  d S )N)r   r{  rY   rY   rY   rZ   app  s    r  __main__)'r  randomrT   textwrapr   tkinterr   r   r   r   r   r   r   r	   r
   r   Ztkinter.filedialogr   r   Ztkinter.fontr   Z
nltk.chunkr   r   Znltk.chunk.regexpr   Znltk.corpusr   r   Znltk.draw.utilr   Z	nltk.treer   Z	nltk.utilr   r   r  r~  __all__rY   rY   rY   rZ   <module>   s:   0           3