3
dS                 @   sj   d Z ddlZddlZddlZddlmZ ddlZddlm	Z	 G dd dZ
G dd dZG d	d
 d
ZdS )a  
If you use the VADER sentiment analysis tools, please cite:

Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
Sentiment Analysis of Social Media Text. Eighth International Conference on
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
    N)product)pairwisec            C   @   s  e Zd ZdZdZdcZdZddZddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?h;Zeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeed@BZ	dAdAdBdedCdfdgdDZ
ejdEejej dFZdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWgZdXdY Zdhd[d\Zdid^d_Zd`da ZdbS )jVaderConstantsz8
    A class to keep the Vader lists and constants.
    gn?g~jt?gGz?ZaintZarentZcannotZcantZcouldntZdarentZdidntZdoesntzain'tzaren'tzcan'tzcouldn'tzdaren'tzdidn'tzdoesn'tZdontZhadntZhasntZhaventZisntZmightntZmustntZneitherzdon'tzhadn'tzhasn'tzhaven'tzisn'tzmightn'tzmustn'tZneedntzneedn'tnevernoneZnopeZnornotZnothingZnowhereZoughtntZshantZshouldntZuhuhZwasntZwerentzoughtn'tzshan'tz	shouldn'tzuh-uhzwasn'tzweren'tZwithoutZwontZwouldntzwon'tzwouldn'tZrarelyZseldomZdespite)BZ
absolutelyZ	amazinglyZawfullyZ
completelyZconsiderablyZ	decidedlyZdeeplyZeffingZ
enormouslyZentirelyZ
especiallyZexceptionallyZ	extremelyZ
fabulouslyZflippingZflippinZfrickingZfrickinZfriggingZfrigginZfullyZfuckingZgreatlyZhellaZhighlyZhugelyZ
incrediblyZ	intenselyZmajorlyZmoreZmostZparticularlyZpurelyZquiteZreallyZ
remarkablysoZsubstantiallyZ
thoroughlyZtotallyZtremendouslyZuberZunbelievablyZ	unusuallyZutterlyveryZalmostZbarelyZhardlyzjust enoughzkind ofZkindaZkindofzkind-ofZlesslittleZ
marginallyZoccasionallyZpartlyZscarcelyZslightlyZsomewhatzsort ofZsortaZsortofzsort-of   g      ?   )zthe shitzthe bombzbad assz
yeah rightzcut the mustardzkiss of deathzhand to mouth[].!?,;:-'"z!!z!!!z??z???z?!?z!?!z?!?!z!?!?c             C   s   d S )N )selfr   r   4/tmp/pip-build-v9q4h5k9/nltk/nltk/sentiment/vader.py__init__   s    zVaderConstants.__init__Tc                sp   | j  t fdd|D r dS |r:tdd |D r:dS x0t|D ]$\}}|j dkrD|j dkrDdS qDW dS )z<
        Determine if input contains negation words
        c             3   s   | ]}|j   kV  qd S )N)lower).0word)	neg_wordsr   r   	<genexpr>   s    z)VaderConstants.negated.<locals>.<genexpr>Tc             s   s   | ]}d |j  kV  qdS )zn'tN)r   )r   r   r   r   r   r       s    leastatF)NEGATEanyr   r   )r   Zinput_wordsZ
include_ntfirstsecondr   )r   r   negated   s    zVaderConstants.negated   c             C   s   |t j|| |  }|S )z|
        Normalize the score to be between -1 and 1 using an alpha that
        approximates the max expected value
        )mathsqrt)r   ZscorealphaZ
norm_scorer   r   r   	normalize   s    zVaderConstants.normalizec             C   s^   d}|j  }|| jkrZ| j| }|dk r0|d9 }|j rZ|rZ|dkrP|| j7 }n
|| j8 }|S )zh
        Check if the preceding words increase, decrease, or negate/nullify the
        valence
        g        r      )r   BOOSTER_DICTisupperC_INCR)r   r   valenceis_cap_diffZscalarZ
word_lowerr   r   r   scalar_inc_dec   s    


zVaderConstants.scalar_inc_decNgnҿgGzg      r5   )T)r(   )__name__
__module____qualname____doc__ZB_INCRB_DECRr1   N_SCALARr#   r/   SPECIAL_CASE_IDIOMSrecompileescapestringpunctuationREGEX_REMOVE_PUNCTUATION	PUNC_LISTr   r'   r,   r4   r   r   r   r   r   !   s>  

r   c               @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )	SentiTextzL
    Identify sentiment-relevant string-level properties of input text.
    c             C   sF   t |tst|jd}|| _|| _|| _| j | _| j| j| _	d S )Nzutf-8)

isinstancestrencodetextrC   rB   _words_and_emoticonswords_and_emoticonsallcap_differentialr3   )r   rH   Z	punc_listZregex_remove_punctuationr   r   r   r     s    

zSentiText.__init__c             C   sd   | j jd| j}|j }dd |D }dd t| j|D }dd t|| jD }|}|j| |S )zt
        Returns mapping of form:
        {
            'cat,': 'cat',
            ',cat': 'cat',
        }
         c             S   s   h | ]}t |d kr|qS )r-   )len)r   wr   r   r   	<setcomp>#  s    z-SentiText._words_plus_punc.<locals>.<setcomp>c             S   s   i | ]}|d  dj |qS )r-   rL   )join)r   pr   r   r   
<dictcomp>%  s    z.SentiText._words_plus_punc.<locals>.<dictcomp>c             S   s   i | ]}|d  dj |qS )r   rL   )rP   )r   rQ   r   r   r   rR   &  s    )rB   subrH   splitr   rC   update)r   Zno_punc_textZ
words_onlyZpunc_beforeZ
punc_afterwords_punc_dictr   r   r   _words_plus_punc  s    
zSentiText._words_plus_puncc             C   sN   | j j }| j }dd |D }x(t|D ]\}}||kr*|| ||< q*W |S )z
        Removes leading and trailing puncutation
        Leaves contractions and most emoticons
            Does not preserve punc-plus-letter emoticons (e.g. :D)
        c             S   s   g | ]}t |d kr|qS )r-   )rM   )r   wer   r   r   
<listcomp>3  s    z2SentiText._words_and_emoticons.<locals>.<listcomp>)rH   rT   rW   	enumerate)r   ZwesrV   irX   r   r   r   rI   +  s    
zSentiText._words_and_emoticonsc             C   sV   d}d}x|D ]}|j  r|d7 }qW t|| }d|  k oHt|k n  rRd}|S )z
        Check whether just some words in the input are ALL CAPS

        :param list words: The words to inspect
        :returns: `True` if some but not all items in `words` are ALL CAPS
        Fr   r-   T)r0   rM   )r   wordsZis_differentZallcap_wordsr   Zcap_differentialr   r   r   rK   9  s    
zSentiText.allcap_differentialN)r6   r7   r8   r9   r   rW   rI   rK   r   r   r   r   rD     s
   rD   c               @   sz   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )SentimentIntensityAnalyzerz8
    Give a sentiment intensity score to sentences.
    ;sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txtc             C   s$   t jj|| _| j | _t | _d S )N)nltkdataloadlexicon_filemake_lex_dictlexiconr   	constants)r   rb   r   r   r   r   P  s    
z#SentimentIntensityAnalyzer.__init__c             C   sD   i }x:| j jdD ]*}|j jddd \}}t|||< qW |S )z6
        Convert lexicon file to a dictionary
        
	r   r   )rb   rT   stripfloat)r   Zlex_dictliner   Zmeasurer   r   r   rc   X  s
    z(SentimentIntensityAnalyzer.make_lex_dictc             C   s   t || jj| jj}g }|j}xx|D ]p}d}|j|}|t|d k rf|j dkrf||d  j dksv|j | jjkr|j	| q$| j
|||||}q$W | j||}| j||S )z
        Return a float for sentiment strength based on the input text.
        Positive values are positive valence, negative value are negative
        valence.
        r   r-   kindZof)rD   re   rC   rB   rJ   indexrM   r   r/   appendsentiment_valence
_but_checkscore_valence)r   rH   	sentitext
sentimentsrJ   itemr2   r[   r   r   r   polarity_scoresb  s     


z*SentimentIntensityAnalyzer.polarity_scoresc             C   s$  |j }|j}|j }|| jkr| j| }|j rX|rX|dkrL|| jj7 }n|| jj8 }xtddD ]}	||	ko|||	d   j | jkrd| jj|||	d   ||}
|	dkr|
dkr|
d }
|	dkr|
dkr|
d }
||
 }| j	|||	|}|	dkrd| j
|||}qdW | j|||}|j| |S )Nr   r   r-   gffffff?r   g?)r3   rJ   r   rd   r0   re   r1   ranger4   _never_check_idioms_check_least_checkrm   )r   r2   rq   rs   r[   rr   r3   rJ   Zitem_lowercasestart_isr   r   r   rn     s6    
	
z,SentimentIntensityAnalyzer.sentiment_valencec             C   s   |dkrh||d  j  | jkrh||d  j  dkrh||d  j  dkr||d  j  dkr|| jj }n>|dkr||d  j  | jkr||d  j  dkr|| jj }|S )Nr-   r!   r   r"   r	   r   )r   rd   re   r;   )r   r2   rJ   r[   r   r   r   rx     s    z'SentimentIntensityAnalyzer._least_checkc             C   sv   dd |D }dht |@ }|rr|jtt|}x>t|D ]2\}}||k rZ|d ||< q<||kr<|d ||< q<W |S )Nc             S   s   g | ]}|j  qS r   )r   )r   Zw_er   r   r   rY     s    z9SentimentIntensityAnalyzer._but_check.<locals>.<listcomp>butg      ?g      ?)setrl   nextiterrZ   )r   rJ   rr   r{   ZbiZsidxZ	sentimentr   r   r   ro     s    z%SentimentIntensityAnalyzer._but_checkc             C   s  ||d   d||  }dj ||d  ||d  || }||d   d||d   }dj ||d  ||d  ||d  }dj ||d  ||d  }|||||g}	x&|	D ]}
|
| jjkr| jj|
 }P qW t|d |kr||  d||d   }|| jjkr| jj| }t|d |d krjdj || ||d  ||d  }|| jjkrj| jj| }|| jjks|| jjkr|| jj }|S )Nr-    z{} {} {}r   r   z{} {})formatre   r<   rM   r/   r:   )r   r2   rJ   r[   ZonezeroZ
twoonezeroZtwooneZthreetwooneZthreetwo	sequencesseqZzerooneZ
zeroonetwor   r   r   rw     s@    






z(SentimentIntensityAnalyzer._idioms_checkc             C   s"  |dkr*| j j||d  gr*|| j j }|dkr||d  dkrl||d  dksb||d  dkrl|d }n&| j j|||d   gr|| j j }|dkr||d  dkr||d  dks||d  dks||d  dks||d  dkr|d	 }n(| j j|||d   gr|| j j }|S )
Nr   r-   r   r   r   thisg      ?r   g      ?)re   r'   r;   )r   r2   rJ   ry   r[   r   r   r   rv     s(    


z'SentimentIntensityAnalyzer._never_checkc             C   s    | j |}| j|}|| }|S )N)_amplify_ep_amplify_qm)r   sum_srH   ep_amplifierqm_amplifierpunct_emph_amplifierr   r   r   _punctuation_emphasis  s    

z0SentimentIntensityAnalyzer._punctuation_emphasisc             C   s"   |j d}|dkrd}|d }|S )Nr      g㥛 ?)count)r   rH   Zep_countr   r   r   r   r   &  s
    
z&SentimentIntensityAnalyzer._amplify_epc             C   s0   |j d}d}|dkr,|dkr(|d }nd}|S )Nr   r   r-   r   g
ףp=
?gQ?)r   )r   rH   Zqm_countr   r   r   r   r   0  s    

z&SentimentIntensityAnalyzer._amplify_qmc             C   sd   d}d}d}xL|D ]D}|dkr.|t |d 7 }|dk rF|t |d 7 }|dkr|d7 }qW |||fS )Ng        r   r-   )ri   )r   rr   pos_sumneg_sum	neu_countZsentiment_scorer   r   r   _sift_sentiment_scores=  s    
z1SentimentIntensityAnalyzer._sift_sentiment_scoresc             C   s   |rt t|}| j||}|dkr.||7 }n|dk r>||8 }| jj|}| j|\}}}|tj|krr||7 }n|tj|k r||8 }|tj| | }	tj||	 }
tj||	 }tj||	 }nd}d}
d}d}t|dt|dt|
dt|dd}|S )Nr   g        r   r   )negneuposcompound)	ri   sumr   re   r,   r   r)   fabsround)r   rr   rH   r   r   r   r   r   r   totalr   r   r   Zsentiment_dictr   r   r   rp   O  s4    

z(SentimentIntensityAnalyzer.score_valenceN)r^   )r6   r7   r8   r9   r   rc   rt   rn   rx   ro   rw   rv   r   r   r   r   rp   r   r   r   r   r]   K  s   

21
r]   )r9   r)   r=   r@   	itertoolsr   Z	nltk.datar_   Z	nltk.utilr   r   rD   r]   r   r   r   r   <module>   s    gD