3
dp              	   @   s2  d Z ddlZddlZddlmZmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ eddddddddgZG dd dZG dd dZG dd dZ G dd dZ!G dd de!Z"d d! Z#e$d"kr e#  dddddgZ%dS )#a  
This module brings together a variety of NLTK functionality for
text analysis, and provides simple, interactive interfaces.
Functionality includes: concordancing, collocation discovery,
regular expression search over tokenized strings, and
distributional similarity.
    N)Counterdefaultdict
namedtuple)reduce)log)BigramCollocationFinder)MLE)padded_everygram_pipeline)BigramAssocMeasures	f_measure)ConditionalFreqDist)FreqDist)sent_tokenize)LazyConcatenation	tokenwrapConcordanceLineleftqueryrightoffset
left_printright_printlinec               @   sT   e Zd ZdZedd Zdddd fddZd	d
 Zdd ZdddZ	dddZ
dS )ContextIndexa  
    A bidirectional index between words and their 'contexts' in a text.
    The context of a word is usually defined to be the words that occur
    in a fixed window around the word; but other definitions may also
    be used by providing a custom context function.
    c             C   sH   |dkr| |d  j  nd}|t| d kr<| |d  j  nd}||fS )z;One left token and one right token, normalized to lowercaser      z*START*z*END*)lowerlen)tokensir   r    r   )/tmp/pip-build-v9q4h5k9/nltk/nltk/text.py_default_context.   s    $zContextIndex._default_contextNc             C   s   | S )Nr   )xr   r   r    <lambda>5   s    zContextIndex.<lambda>c                sv   |_ _|r|_nj_ r6 fddD tfddtD _tfddtD _d S )Nc                s   g | ]} |r|qS r   r   ).0t)filterr   r    
<listcomp>=   s    z)ContextIndex.__init__.<locals>.<listcomp>c             3   s(   | ] \}} j | j|fV  qd S )N)_key_context_func)r$   r   w)selfr   r   r    	<genexpr>?   s    z(ContextIndex.__init__.<locals>.<genexpr>c             3   s(   | ] \}} j | j|fV  qd S )N)r)   r(   )r$   r   r*   )r+   r   r   r    r,   B   s    )r(   _tokensr)   r!   CFD	enumerate_word_to_contexts_context_to_words)r+   r   Zcontext_funcr&   keyr   )r&   r+   r   r    __init__5   s    zContextIndex.__init__c             C   s   | j S )zw
        :rtype: list(str)
        :return: The document that this context index was
            created from.
        )r-   )r+   r   r   r    r   E   s    zContextIndex.tokensc             C   sJ   | j |}t| j| }i }x(| jj D ]\}}t|t|||< q(W |S )z
        Return a dictionary mapping from words to 'similarity scores,'
        indicating how often these two words occur in the same
        context.
        )r(   setr0   itemsr   )r+   wordZword_contextsscoresr*   Z
w_contextsr   r   r    word_similarity_dictM   s    
z!ContextIndex.word_similarity_dict   c             C   s~   t t}x\| j| j| D ]H}xB| j| D ]4}||kr*||  | j| | | j| |  7  < q*W qW t||jddd | S )NT)r2   reverse)r   intr0   r(   r1   sortedget)r+   r6   nr7   cr*   r   r   r    similar_words\   s    (zContextIndex.similar_wordsFc                s   fddD fddD fddt tD }ttj |rf|rftddjn& spt S t fddD }|S d	S )
a  
        Find contexts where the specified words can all appear; and
        return a frequency distribution mapping each context to the
        number of times that context was used.

        :param words: The words used to seed the similarity search
        :type words: str
        :param fail_on_unknown: If true, then raise a value error if
            any of the given words do not occur at all in the index.
        c                s   g | ]} j |qS r   )r(   )r$   r*   )r+   r   r    r'   q   s    z0ContextIndex.common_contexts.<locals>.<listcomp>c                s   g | ]}t  j| qS r   )r4   r0   )r$   r*   )r+   r   r    r'   r   s    c                s   g | ]} | s| qS r   r   )r$   r   )contextswordsr   r    r'   s   s    z%The following word(s) were not found: c             3   s*   | ]"}j | D ]}| kr|V  qqd S )N)r0   )r$   r*   r?   )commonr+   r   r    r,   |   s    z/ContextIndex.common_contexts.<locals>.<genexpr>N)ranger   r   r4   intersection
ValueErrorjoinr   )r+   rB   Zfail_on_unknownemptyfdr   )rD   rA   r+   rB   r    common_contextsf   s    zContextIndex.common_contexts)r9   )F)__name__
__module____qualname____doc__staticmethodr!   r3   r   r8   r@   rK   r   r   r   r    r   &   s   

r   c               @   sL   e Zd ZdZdd fddZdd Zdd	 Zd
d ZdddZdddZ	dS )ConcordanceIndexzs
    An index that can be used to look up the offset locations at which
    a given word occurs in a document.
    c             C   s   | S )Nr   )r"   r   r   r    r#      s    zConcordanceIndex.<lambda>c             C   sJ   || _ || _tt| _x.t|D ]"\}}| j|}| j| j| q W dS )a  
        Construct a new concordance index.

        :param tokens: The document (list of tokens) that this
            concordance index was created from.  This list can be used
            to access the context of a given word occurrence.
        :param key: A function that maps each token to a normalized
            version that will be used as a key in the index.  E.g., if
            you use ``key=lambda s:s.lower()``, then the index will be
            case-insensitive.
        N)r-   r(   r   list_offsetsr/   append)r+   r   r2   indexr6   r   r   r    r3      s    

zConcordanceIndex.__init__c             C   s   | j S )z{
        :rtype: list(str)
        :return: The document that this concordance index was
            created from.
        )r-   )r+   r   r   r    r      s    zConcordanceIndex.tokensc             C   s   | j |}| j| S )z
        :rtype: list(int)
        :return: A list of the offset positions at which the given
            word occurs.  If a key function was specified for the
            index, then given word's key will be looked up.
        )r(   rS   )r+   r6   r   r   r    offsets   s    
zConcordanceIndex.offsetsc             C   s   dt | jt | jf S )Nz+<ConcordanceIndex for %d tokens (%d types)>)r   r-   rS   )r+   r   r   r    __repr__   s    zConcordanceIndex.__repr__P   c          	      sH  t |tr|}n|g}|tdj| d d }|d }g }| j|d }xBt|dd D ].\ } fdd| j|D }t|j|}q\W |rDx|D ] dj| j  t|  }	| jt	d |   }
| j t|  |  }dj|
| d }dj|d| }dj||	|g}t
|
|	| |||}|j| qW |S )	z
        Find all concordance lines given the query word.

        Provided with a list of words, these will be found as a phrase.
        rC         r   r   Nc                s   h | ]}|  d  qS )r   r   )r$   r   )r   r   r    	<setcomp>   s    z4ConcordanceIndex.find_concordance.<locals>.<setcomp>)
isinstancerR   r   rH   rV   r/   r<   rF   r-   maxr   rT   )r+   r6   widthphraseZ
half_widthcontextconcordance_listrV   Zword_offsetsZ
query_wordZleft_contextZright_contextr   r   Z
line_printconcordance_liner   )r   r    find_concordance   s8    

z!ConcordanceIndex.find_concordance   c             C   sp   | j ||d}|std nPt|t|}td| dt| d x&t|d| D ]\}}t|j qVW dS )a  
        Print concordance lines given the query word.
        :param word: The target word or phrase (a list of strings)
        :type word: str or list
        :param lines: The number of lines to display (default=25)
        :type lines: int
        :param width: The width of each line, in characters (default=80)
        :type width: int
        :param save: The option to save the concordance.
        :type save: bool
        )r^   z
no matcheszDisplaying z of z	 matches:N)rc   printminr   r/   r   )r+   r6   r^   linesra   r   rb   r   r   r    print_concordance   s    
z"ConcordanceIndex.print_concordanceN)rX   )rX   rd   )
rL   rM   rN   rO   r3   r   rV   rW   rc   rh   r   r   r   r    rQ      s   

,rQ   c               @   s    e Zd ZdZdd Zdd ZdS )TokenSearchera  
    A class that makes it easier to use regular expressions to search
    over tokenized strings.  The tokenized string is converted to a
    string where tokens are marked with angle brackets -- e.g.,
    ``'<the><window><is><still><open>'``.  The regular expression
    passed to the ``findall()`` method is modified to treat angle
    brackets as non-capturing parentheses, in addition to matching the
    token boundaries; and to have ``'.'`` not match the angle brackets.
    c             C   s   dj dd |D | _d S )N c             s   s   | ]}d | d V  qdS )<>Nr   )r$   r*   r   r   r    r,     s    z)TokenSearcher.__init__.<locals>.<genexpr>)rH   _raw)r+   r   r   r   r    r3     s    zTokenSearcher.__init__c             C   s   t jdd|}t jdd|}t jdd|}t jdd|}t j|| j}x*|D ]"}|jd rL|jdrLtd	qLW d
d |D }|S )a"  
        Find instances of the regular expression in the text.
        The text is a list of tokens, and a regexp pattern to match
        a single token must be surrounded by angle brackets.  E.g.

        >>> from nltk.text import TokenSearcher
        >>> print('hack'); from nltk.book import text1, text5, text9
        hack...
        >>> text5.findall("<.*><.*><bro>")
        you rule bro; telling you bro; u twizted bro
        >>> text1.findall("<a>(<.*>)<man>")
        monied; nervous; dangerous; white; white; white; pious; queer; good;
        mature; white; Cape; great; wise; wise; butterless; white; fiendish;
        pale; furious; better; certain; complete; dismasted; younger; brave;
        brave; brave; brave
        >>> text9.findall("<th.*>{3,}")
        thread through those; the thought that; that the thing; the thing
        that; that that thing; through these than through; them that the;
        through the thick; them that they; thought that the

        :param regexp: A regular expression
        :type regexp: str
        z\srj   rk   z(?:<(?:rl   z)>)z	(?<!\\)\.z[^>]z$Bad regexp for TokenSearcher.findallc             S   s   g | ]}|d d j dqS )r   z><)split)r$   hr   r   r    r'   1  s    z)TokenSearcher.findall.<locals>.<listcomp>)resubfindallrm   
startswithendswithrG   )r+   regexphitsrp   r   r   r    rs   
  s    
zTokenSearcher.findallN)rL   rM   rN   rO   r3   rs   r   r   r   r    ri      s   	ri   c               @   s   e Zd ZdZdZd6ddZdd Zdd	 Zd7ddZd8ddZ	d9ddZ
d:ddZdd Zdd Zdd Zd;ddZd<ddZd d! Zd=d#d$Zd>d'd(Zd)d* Zd+d, Zd-d. Zejd/Zd0d1 Zd2d3 Zd4d5 ZdS )?Texta  
    A wrapper around a sequence of simple (string) tokens, which is
    intended to support initial exploration of texts (via the
    interactive console).  Its methods perform a variety of analyses
    on the text's contexts (e.g., counting, concordancing, collocation
    discovery), and display the results.  If you wish to write a
    program which makes use of these analyses, then you should bypass
    the ``Text`` class, and use the appropriate analysis function or
    class directly instead.

    A ``Text`` is typically initialized from a given document or
    corpus.  E.g.:

    >>> import nltk.corpus
    >>> from nltk.text import Text
    >>> moby = Text(nltk.corpus.gutenberg.words('melville-moby_dick.txt'))

    TNc             C   s   | j rt|}|| _|r || _ndd|dd krb|dd jd}djdd |d| D | _n"djdd |dd	 D d
 | _dS )zv
        Create a Text object.

        :param tokens: The source text.
        :type tokens: sequence of str
        ]Nr9   rC   c             s   s   | ]}t |V  qd S )N)str)r$   tokr   r   r    r,   ^  s    z Text.__init__.<locals>.<genexpr>r   c             s   s   | ]}t |V  qd S )N)rz   )r$   r{   r   r   r    r,   `  s       z...)_COPY_TOKENSrR   r   namerU   rH   )r+   r   r~   endr   r   r    r3   O  s     zText.__init__c             C   s
   | j | S )N)r   )r+   r   r   r   r    __getitem__f  s    zText.__getitem__c             C   s
   t | jS )N)r   r   )r+   r   r   r    __len__i  s    zText.__len__O   rd   c             C   s.   d| j krt| jdd d| _| jj|||S )a  
        Prints a concordance for ``word`` with the specified context window.
        Word matching is not case-sensitive.

        :param word: The target word or phrase (a list of strings)
        :type word: str or list
        :param width: The width of each line, in characters (default=80)
        :type width: int
        :param lines: The number of lines to display (default=25)
        :type lines: int

        :seealso: ``ConcordanceIndex``
        _concordance_indexc             S   s   | j  S )N)r   )sr   r   r    r#     s    z"Text.concordance.<locals>.<lambda>)r2   )__dict__rQ   r   r   rh   )r+   r6   r^   rg   r   r   r    concordancep  s    
zText.concordancec             C   s4   d| j krt| jdd d| _| jj||d| S )a  
        Generate a concordance for ``word`` with the specified context window.
        Word matching is not case-sensitive.

        :param word: The target word or phrase (a list of strings)
        :type word: str or list
        :param width: The width of each line, in characters (default=80)
        :type width: int
        :param lines: The number of lines to display (default=25)
        :type lines: int

        :seealso: ``ConcordanceIndex``
        r   c             S   s   | j  S )N)r   )r   r   r   r    r#     s    z'Text.concordance_list.<locals>.<lambda>)r2   N)r   rQ   r   r   rc   )r+   r6   r^   rg   r   r   r    ra     s    
zText.concordance_listr9   rY   c                s   d| j ko| j|ko| j|ks|| _|| _ddlm} |jd tj| j|}|j	d |j
 fdd t }t|j|j|| _| jS )a  
        Return collocations derived from the text, ignoring stopwords.

            >>> from nltk.book import text4
            >>> text4.collocation_list()[:2]
            [('United', 'States'), ('fellow', 'citizens')]

        :param num: The maximum number of collocations to return.
        :type num: int
        :param window_size: The number of tokens spanned by a collocation (default=2)
        :type window_size: int
        :rtype: list(tuple(str, str))
        _collocationsr   )	stopwordsenglishrY   c                s   t | dk p| j  kS )N   )r   r   )r*   )ignored_wordsr   r    r#     s    z'Text.collocation_list.<locals>.<lambda>)r   Z_numZ_window_sizenltk.corpusr   rB   r   Z
from_wordsr   Zapply_freq_filterZapply_word_filterr
   rR   ZnbestZlikelihood_ratior   )r+   numwindow_sizer   finderZbigram_measuresr   )r   r    collocation_list  s    




zText.collocation_listc             C   s*   dd | j ||D }tt|dd dS )a  
        Print collocations derived from the text, ignoring stopwords.

            >>> from nltk.book import text4
            >>> text4.collocations() # doctest: +ELLIPSIS
            United States; fellow citizens; four years; ...

        :param num: The maximum number of collocations to print.
        :type num: int
        :param window_size: The number of tokens spanned by a collocation (default=2)
        :type window_size: int
        c             S   s   g | ]\}}|d  | qS )rC   r   )r$   w1w2r   r   r    r'     s    z%Text.collocations.<locals>.<listcomp>z; )	separatorN)r   re   r   )r+   r   r   Zcollocation_stringsr   r   r    collocations  s    zText.collocationsc             C   s   | j j|S )zJ
        Count the number of times this word appears in the text.
        )r   count)r+   r6   r   r   r    r     s    z
Text.countc             C   s   | j j|S )zQ
        Find the index of the first occurrence of the word in the text.
        )r   rU   )r+   r6   r   r   r    rU     s    z
Text.indexc             C   s   t d S )N)NotImplementedError)r+   methodr   r   r    readability  s    zText.readabilityc                s   d| j kr$t| jdd dd d| _j | jjj krt  t fddj D }dd	 |j	|D }t
t| nt
d
 dS )a~  
        Distributional similarity: find other words which appear in the
        same contexts as the specified word; list most similar words first.

        :param word: The word used to seed the similarity search
        :type word: str
        :param num: The number of words to generate (default=20)
        :type num: int
        :seealso: ContextIndex.similar_words()
        _word_context_indexc             S   s   | j  S )N)isalpha)r"   r   r   r    r#     s    zText.similar.<locals>.<lambda>c             S   s   | j  S )N)r   )r   r   r   r    r#     s    )r&   r2   c             3   s2   | ]*}| D ]}| kr|k r|V  qqd S )Nr   )r$   r*   r?   )rA   wcir6   r   r    r,     s   zText.similar.<locals>.<genexpr>c             S   s   g | ]\}}|qS r   r   )r$   r*   _r   r   r    r'     s    z Text.similar.<locals>.<listcomp>z
No matchesN)r   r   r   r   r   r0   Z
conditionsr4   r   most_commonre   r   )r+   r6   r   rJ   rB   r   )rA   r   r6   r    similar  s    
zText.similarc             C   s   d| j krt| jdd d| _yJ| jj|d}|s<td n*dd |j|D }ttd	d
 |D  W n* tk
r } zt| W Y dd}~X nX dS )aY  
        Find contexts where the specified words appear; list
        most frequent common contexts first.

        :param words: The words used to seed the similarity search
        :type words: str
        :param num: The number of words to generate (default=20)
        :type num: int
        :seealso: ContextIndex.common_contexts()
        r   c             S   s   | j  S )N)r   )r   r   r   r    r#     s    z&Text.common_contexts.<locals>.<lambda>)r2   TzNo common contexts were foundc             S   s   g | ]\}}|qS r   r   )r$   r*   r   r   r   r    r'     s    z(Text.common_contexts.<locals>.<listcomp>c             s   s   | ]\}}|d  | V  qdS )r   Nr   )r$   r   r   r   r   r    r,     s    z'Text.common_contexts.<locals>.<genexpr>N)	r   r   r   r   rK   re   r   r   rG   )r+   rB   r   rJ   Zranked_contextser   r   r    rK     s    

zText.common_contextsc             C   s   ddl m} || | dS )z
        Produce a plot showing the distribution of the words through the text.
        Requires pylab to be installed.

        :param words: The words to be plotted
        :type words: list(str)
        :seealso: nltk.draw.dispersion_plot()
        r   )dispersion_plotN)Z	nltk.drawr   )r+   rB   r   r   r   r    r     s    	zText.dispersion_plotr   c             C   s(   t ||\}}t|d}|j|| |S )N)order)r	   r   Zfit)r+   Ztokenized_sentsr>   Z
train_dataZpadded_sentsmodelr   r   r    _train_default_ngram_lm*  s    
zText._train_default_ngram_lmd   *   c       	      C   s   dd t dj| jD | _t| dsFtdtjd | j| jdd| _	g }|d	ksZt
d
xZt||k rx@t| j	j|||dD ]&\}}|dkrq|dkrP |j| qW |d7 }q\W |rdj|d nd}|t|d|  }t| |S )a  
        Print random text, generated using a trigram language model.
        See also `help(nltk.lm)`.

        :param length: The length of text to generate (default=100)
        :type length: int

        :param text_seed: Generation can be conditioned on preceding context.
        :type text_seed: list(str)

        :param random_seed: A random seed or an instance of `random.Random`. If provided,
            makes the random sampling part of generation reproducible. (default=42)
        :type random_seed: int
        c             S   s   g | ]}|j d qS )rC   )ro   )r$   sentr   r   r    r'   A  s    z!Text.generate.<locals>.<listcomp>rC   _trigram_modelzBuilding ngram index...)filer   )r>   r   z!The `length` must be more than 0.)	text_seedrandom_seedz<s>z</s>r   rj   N)r   rH   r   Z_tokenized_sentshasattrre   sysstderrr   r   AssertionErrorr   r/   generaterT   r   )	r+   lengthr   r   Zgenerated_tokensidxtokenprefixZ
output_strr   r   r    r   0  s*    
zText.generatec             G   s   | j  j| S )zc
        See documentation for FreqDist.plot()
        :seealso: nltk.prob.FreqDist.plot()
        )vocabplot)r+   argsr   r   r    r   ^  s    z	Text.plotc             C   s   d| j krt| | _| jS )z.
        :seealso: nltk.prob.FreqDist
        _vocab)r   r   r   )r+   r   r   r    r   e  s    

z
Text.vocabc             C   s@   d| j krt| | _| jj|}dd |D }tt|d dS )a  
        Find instances of the regular expression in the text.
        The text is a list of tokens, and a regexp pattern to match
        a single token must be surrounded by angle brackets.  E.g.

        >>> print('hack'); from nltk.book import text1, text5, text9
        hack...
        >>> text5.findall("<.*><.*><bro>")
        you rule bro; telling you bro; u twizted bro
        >>> text1.findall("<a>(<.*>)<man>")
        monied; nervous; dangerous; white; white; white; pious; queer; good;
        mature; white; Cape; great; wise; wise; butterless; white; fiendish;
        pale; furious; better; certain; complete; dismasted; younger; brave;
        brave; brave; brave
        >>> text9.findall("<th.*>{3,}")
        thread through those; the thought that; that the thing; the thing
        that; that that thing; through these than through; them that the;
        through the thick; them that they; thought that the

        :param regexp: A regular expression
        :type regexp: str
        _token_searcherc             S   s   g | ]}d j |qS )rC   )rH   )r$   rp   r   r   r    r'     s    z Text.findall.<locals>.<listcomp>z; N)r   ri   r   rs   re   r   )r+   rv   rw   r   r   r    rs   n  s
    

zText.findallz\w+|[\.\!\?]c             C   s   |d }x&|dkr.| j j||  r.|d8 }q
W |dkr@|| nd}|d }x*|t|k rv| j j||  rv|d7 }qNW |t|kr|| nd}||fS )z
        One left & one right token, both case-normalized.  Skip over
        non-sentence-final punctuation.  Used by the ``ContextIndex``
        that is created for ``similar()`` and ``common_contexts()``.
        r   r   z*START*z*END*)_CONTEXT_REmatchr   )r+   r   r   jr   r   r   r   r    _context  s     zText._contextc             C   s
   d| j  S )Nz
<Text: %s>)r~   )r+   r   r   r    __str__  s    zText.__str__c             C   s
   d| j  S )Nz
<Text: %s>)r~   )r+   r   r   r    rW     s    zText.__repr__)N)r   rd   )r   rd   )r9   rY   )r9   rY   )r9   )r9   )r   )r   Nr   )rL   rM   rN   rO   r}   r3   r   r   r   ra   r   r   r   rU   r   r   rK   r   r   r   r   r   rs   rq   compiler   r   r   rW   r   r   r   r    rx   5  s0   



#

"


.	#
rx   c               @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )TextCollectionaV  A collection of texts, which can be loaded with list of texts, or
    with a corpus consisting of one or more texts, and which supports
    counting, concordancing, collocation discovery, etc.  Initialize a
    TextCollection as follows:

    >>> import nltk.corpus
    >>> from nltk.text import TextCollection
    >>> print('hack'); from nltk.book import text1, text2, text3
    hack...
    >>> gutenberg = TextCollection(nltk.corpus.gutenberg)
    >>> mytexts = TextCollection([text1, text2, text3])

    Iterating over a TextCollection produces all the tokens of all the
    texts in order.
    c                s@   t  dr  fdd j D   | _tj| t  i | _d S )NrB   c                s   g | ]} j |qS r   )rB   )r$   f)sourcer   r    r'     s    z+TextCollection.__init__.<locals>.<listcomp>)r   Zfileids_textsrx   r3   r   
_idf_cache)r+   r   r   )r   r    r3     s
    
zTextCollection.__init__c             C   s   |j |t| S )z"The frequency of the term in text.)r   r   )r+   termtextr   r   r    tf  s    zTextCollection.tfc                sj   | j j }|dkrft fdd| jD }t| jdkrBtd|rXtt| j| nd}|| j  < |S )zThe number of texts in the corpus divided by the
        number of texts that the term appears in.
        If a term does not appear in the corpus, 0.0 is returned.Nc                s   g | ]} |krd qS )Tr   )r$   r   )r   r   r    r'     s    z&TextCollection.idf.<locals>.<listcomp>r   z+IDF undefined for empty document collectiong        )r   r=   r   r   rG   r   )r+   r   idfmatchesr   )r   r    r     s    
zTextCollection.idfc             C   s   | j ||| j| S )N)r   r   )r+   r   r   r   r   r    tf_idf  s    zTextCollection.tf_idfN)rL   rM   rN   rO   r3   r   r   r   r   r   r   r    r     s
   r   c              C   s   ddl m}  t| jdd}t| t  td |jd t  td |jd t  td |j  t  td |jdd	d
dg t  td |j	d t  td td|d  td|dd  td|j
 d  d S )Nr   )brownnews)
categorieszConcordance:zDistributionally similar words:zCollocations:zDispersion plot:reportZsaidZ	announcedzVocabulary plot:2   z	Indexing:ztext[3]:r   z
text[3:5]:   ztext.vocab()['news']:)r   r   rx   rB   re   r   r   r   r   r   r   )r   r   r   r   r    demo  s.    


r   __main__)&rO   rq   r   collectionsr   r   r   	functoolsr   mathr   Znltk.collocationsr   Znltk.lmr   Znltk.lm.preprocessingr	   Znltk.metricsr
   r   Znltk.probabilityr   r.   r   Znltk.tokenizer   Z	nltk.utilr   r   r   r   rQ   ri   rx   r   r   rL   __all__r   r   r   r    <module>   s@   [{9   /
