3
df                 @   s,   d dl mZ d dlmZ G dd deZdS )    )load)StemmerIc               @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )RSLPStemmeruF  
    A stemmer for Portuguese.

        >>> from nltk.stem import RSLPStemmer
        >>> st = RSLPStemmer()
        >>> # opening lines of Erico Verissimo's "Música ao Longe"
        >>> text = '''
        ... Clarissa risca com giz no quadro-negro a paisagem que os alunos
        ... devem copiar . Uma casinha de porta e janela , em cima duma
        ... coxilha .'''
        >>> for token in text.split():
        ...     print(st.stem(token))
        clariss risc com giz no quadro-negr a pais que os alun dev copi .
        uma cas de port e janel , em cim dum coxilh .
    c             C   s   g | _ | j j| jd | j j| jd | j j| jd | j j| jd | j j| jd | j j| jd | j j| jd d S )Nzstep0.ptzstep1.ptzstep2.ptzstep3.ptzstep4.ptzstep5.ptzstep6.pt)_modelappend	read_rule)self r	   ./tmp/pip-build-v9q4h5k9/nltk/nltk/stem/rslp.py__init__5   s    zRSLPStemmer.__init__c             C   s   t d| ddjd}|jd}dd |D }dd |D }d	d |D }g }x|D ]x}g }|jd
}|j|d dd  |jt|d  |j|d dd  |jdd |d jdD  |j| qTW |S )Nznltk:stemmers/rslp/raw)formatutf8
c             S   s   g | ]}|d kr|qS ) r	   ).0liner	   r	   r
   
<listcomp>D   s    z)RSLPStemmer.read_rule.<locals>.<listcomp>c             S   s   g | ]}|d  dkr|qS )r   #r	   )r   r   r	   r	   r
   r   E   s    c             S   s   g | ]}|j d dqS )z			)replace)r   r   r	   r	   r
   r   H   s    r   r         c             S   s   g | ]}|d d qS )r   r	   )r   tokenr	   r	   r
   r   Z   s       ,r   r   )r   decodesplitr   int)r   filenameruleslinesr   ruletokensr	   r	   r
   r   @   s    


zRSLPStemmer.read_rulec             C   s   |j  }|d
 dkr | j|d}|d dkr8| j|d}| j|d}| j|d}|}| j|d}||kr|}| j|d}||kr| j|d	}|S )Nr   sr   ar   r            r   r   )lower
apply_rule)r   wordZ	prev_wordr	   r	   r
   stema   s    zRSLPStemmer.stemc             C   sv   | j | }xf|D ]^}t|d }|| d  |d krt|||d  kr||d kr|d |  |d  }P qW |S )Nr   r   r   r   )r   len)r   r,   Z
rule_indexr!   r#   Zsuffix_lengthr	   r	   r
   r+      s    

zRSLPStemmer.apply_ruleN)__name__
__module____qualname____doc__r   r   r-   r+   r	   r	   r	   r
   r   $   s
   !r   N)Z	nltk.datar   Znltk.stem.apir   r   r	   r	   r	   r
   <module>    s   