3
di                 @   s   d dl Z d dlmZ d dlmZ d dlmZ eddgdZe jede j	 d	d
 Z
e
jde jddddde jdddddde jddddde jddddde jdddd dd!d" ZdS )#    N)tqdm)word_tokenize)parallelize_preprocessz-hz--help)Zhelp_option_names)Zcontext_settingsc               C   s   d S )N r   r   r   (/tmp/pip-build-v9q4h5k9/nltk/nltk/cli.pycli   s    r   tokenizez
--languagez-lenz1The language for the Punkt sentence tokenization.)defaulthelpz--preserve-lineTzIAn option to keep the preserve the sentence and not sentence tokenize it.)r
   Zis_flagr   z--processesz-j   zNo. of processes.z
--encodingz-eutf8zSpecify encoding of file.z--delimiterz-d z%Specify delimiter to join the tokens.c       	      C   s   t jd|d}t jd|dp}|dkrXx`t|j D ]}t|jt|d|d q6W n2x0tt|j |ddD ]}t|j|d|d qnW W d	Q R X W d	Q R X d	S )
z;This command tokenizes text stream using nltk.word_tokenizestdin)encodingstdoutr   
)endfileT)Zprogress_barN)clickZget_text_streamr   	readlinesprintjoinr   r   )	languageZpreserve_lineZ	processesr   	delimiterZfinZfoutlineoutliner   r   r   tokenize_file   s    r   )r   r   Znltkr   Z	nltk.utilr   dictZCONTEXT_SETTINGSgroupZversion_optionr   commandoptionr   r   r   r   r   <module>   s,   
