3
­d'  ã               @   s¬   d Z ddlmZmZ ddlmZmZmZ G dd„ deƒZG dd„ deƒZ	G dd	„ d	e	ƒZ
G d
d„ deƒZG dd„ deƒZG dd„ deƒZG dd„ deƒZG dd„ deƒZdS )zLanguage Modelsé    )ÚLanguageModelÚ	Smoothing)ÚAbsoluteDiscountingÚ	KneserNeyÚ
WittenBellc               @   s   e Zd ZdZddd„ZdS )ÚMLEzbClass for providing MLE ngram model scores.

    Inherits initialization from BaseNgramModel.
    Nc             C   s   | j |ƒj|ƒS )zÃReturns the MLE score for a word given a context.

        Args:
        - word is expected to be a string
        - context is expected to be something reasonably convertible to a tuple
        )Úcontext_countsÚfreq)ÚselfÚwordÚcontext© r   ú./tmp/pip-build-v9q4h5k9/nltk/nltk/lm/models.pyÚunmasked_score   s    zMLE.unmasked_score)N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r      s   r   c                   s*   e Zd ZdZ‡ fdd„Zddd„Z‡  ZS )ÚLidstonez«Provides Lidstone-smoothed scores.

    In addition to initialization arguments from BaseNgramModel also requires
    a number by which to increase the counts, gamma.
    c                s   t ƒ j||Ž || _d S )N)ÚsuperÚ__init__Úgamma)r
   r   ÚargsÚkwargs)Ú	__class__r   r   r   %   s    zLidstone.__init__Nc             C   s8   | j |ƒ}|| }|jƒ }|| j |t| jƒ| j   S )ztAdd-one smoothing: Lidstone or Laplace.

        To see what kind, look at `gamma` attribute on the class.

        )r   ÚNr   ÚlenÚvocab)r
   r   r   ÚcountsÚ
word_countÚ
norm_countr   r   r   r   )   s    
zLidstone.unmasked_score)N)r   r   r   r   r   r   Ú__classcell__r   r   )r   r   r      s   r   c                   s    e Zd ZdZ‡ fdd„Z‡  ZS )ÚLaplacezwImplements Laplace (add one) smoothing.

    Initialization identical to BaseNgramModel because gamma is always 1.
    c                s   t ƒ jd|ž|Ž d S )Né   )r#   )r   r   )r
   r   r   )r   r   r   r   ;   s    zLaplace.__init__)r   r   r   r   r   r!   r   r   )r   r   r"   5   s   r"   c                   s,   e Zd ZdZd‡ fdd„	Zd	dd„Z‡  ZS )
ÚStupidBackoffa8  Provides StupidBackoff scores.

    In addition to initialization arguments from BaseNgramModel also requires
    a parameter alpha with which we scale the lower order probabilities.
    Note that this is not a true probability distribution as scores for ngrams
    of the same order do not sum up to unity.
    çš™™™™™Ù?c                s   t ƒ j||Ž || _d S )N)r   r   Úalpha)r
   r&   r   r   )r   r   r   r   H   s    zStupidBackoff.__init__Nc             C   sZ   |s| j jj|ƒS | j|ƒ}|| }|jƒ }|dkr<|| S | j| j||dd … ƒ S d S )Nr   r#   )r   Zunigramsr	   r   r   r&   r   )r
   r   r   r   r   r    r   r   r   r   L   s    
zStupidBackoff.unmasked_score)r%   )N)r   r   r   r   r   r   r!   r   r   )r   r   r$   ?   s   r$   c                   s*   e Zd ZdZ‡ fdd„Zddd„Z‡  ZS )ÚInterpolatedLanguageModelz¡Logic common to all interpolated language models.

    The idea to abstract this comes from Chen & Goodman 1995.
    Do not instantiate this class directly!
    c                s4   |j di ƒ}tƒ j|f|Ž || j| jf|Ž| _d S )NÚparams)Úpopr   r   r   r   Ú	estimator)r
   Zsmoothing_clsÚorderr   r(   )r   r   r   r   `   s    z"InterpolatedLanguageModel.__init__Nc             C   sR   |s| j j|ƒS | j| s$d\}}n| j j||ƒ\}}||| j||dd … ƒ  S )Nr   r#   )r   r#   )r*   Zunigram_scorer   Zalpha_gammar   )r
   r   r   r&   r   r   r   r   r   e   s    

z(InterpolatedLanguageModel.unmasked_score)N)r   r   r   r   r   r   r!   r   r   )r   r   r'   Y   s   r'   c                   s    e Zd ZdZ‡ fdd„Z‡  ZS )ÚWittenBellInterpolatedz.Interpolated version of Witten-Bell smoothing.c                s   t ƒ jt|f|Ž d S )N)r   r   r   )r
   r+   r   )r   r   r   r   v   s    zWittenBellInterpolated.__init__)r   r   r   r   r   r!   r   r   )r   r   r,   s   s   r,   c                   s"   e Zd ZdZd‡ fdd„	Z‡  ZS )ÚAbsoluteDiscountingInterpolatedz9Interpolated version of smoothing with absolute discount.ç      è?c                s"   t ƒ jt|fdd|ii|—Ž d S )Nr(   Údiscount)r   r   r   )r
   r+   r/   r   )r   r   r   r   }   s    z(AbsoluteDiscountingInterpolated.__init__)r.   )r   r   r   r   r   r!   r   r   )r   r   r-   z   s   r-   c                   s"   e Zd ZdZd‡ fdd„	Z‡  ZS )ÚKneserNeyInterpolatedz-Interpolated version of Kneser-Ney smoothing.çš™™™™™¹?c                sD   d|  kodkn  s t dƒ‚tƒ jt|fd||dœi|—Ž d S )Nr   r#   zCDiscount must be between 0 and 1 for probabilities to sum to unity.r(   )r/   r+   )Ú
ValueErrorr   r   r   )r
   r+   r/   r   )r   r   r   r   †   s
    zKneserNeyInterpolated.__init__)r1   )r   r   r   r   r   r!   r   r   )r   r   r0   ƒ   s   r0   N)r   Znltk.lm.apir   r   Znltk.lm.smoothingr   r   r   r   r   r"   r$   r'   r,   r-   r0   r   r   r   r   Ú<module>   s   
	