3
Ðéd(  ã            
   @   s^  d dl Z ddddgZddddgZd<d=d>d?gZe jjd	dd
dgƒdd„ ƒZe jjdeƒe jjd	ddddddgƒdd„ ƒƒZe jjdeƒe jjd	ddddddgƒdd„ ƒƒZe jjdeƒe jjddgƒe jjd	ddddddgƒdd„ ƒƒƒZ	e jjdeƒe jjddgƒe jjd	ddddddgƒdd„ ƒƒƒZ
e jjdeƒe jjd	ddddddgƒd d!„ ƒƒZe jjdeƒe jjd	ddddddgƒd"d#„ ƒƒZe jjd	d$gƒd%d&„ ƒZe jjd	d'gƒd(d)„ ƒZe jjd*eƒe jjd	ddddddgƒd+d,„ ƒƒZe jjd*eƒe jjd-d@gƒe jjd	ddddddgƒd.d/„ ƒƒƒZe jjd	d0d1d2d3d4d5gƒd6d7„ ƒZd8d9„ Zd:d;„ ZdS )Aé    Nú(ú[Ú{Ú*ú)ú]Ú}Útextz((ú<c             C   s    | |ƒ}t |ƒt |ƒkst‚d S )N)ÚlenÚAssertionError)Úuk_tokenizerr	   Útokens© r   úV/var/www/html/virt/lib64/python3.6/site-packages/spacy/tests/lang/uk/test_tokenizer.pyÚ$test_uk_tokenizer_handles_only_punct	   s    r   Úpunctu   ÐŸÑ€Ð¸Ð²ÐµÑ‚u   ÐŸÑ€Ð¸Ð²Ñ–Ñ‚u   ÒÐµÐ»Ò‘Ð¾Ñ‚Ð°Ñ‚Ð¸u   Ð—'Ñ”Ð´Ð½Ð°Ð½Ð½Ñu   Ð„Ð´Ð½Ñ–ÑÑ‚ÑŒu   Ñ—Ñ…Ð½Ñ–c             C   sD   | || ƒ}t |ƒdkst‚|d j|ks.t‚|d j|ks@t‚d S )Né   r   é   )r   r   r	   )r   r   r	   r   r   r   r   Ú#test_uk_tokenizer_splits_open_punct   s    r   c             C   sD   | || ƒ}t |ƒdkst‚|d j|ks.t‚|d j|ks@t‚d S )Nr   r   r   )r   r   r	   )r   r   r	   r   r   r   r   Ú$test_uk_tokenizer_splits_close_punct   s    r   Ú	punct_addú`c             C   sZ   | || | ƒ}t |ƒdks t‚|d j|ks2t‚|d j|ksDt‚|d j|ksVt‚d S )Né   r   r   r   )r   r   r	   )r   r   r   r	   r   r   r   r   Ú,test_uk_tokenizer_splits_two_diff_open_punct%   s
    r   ú'c             C   sZ   | || | ƒ}t |ƒdks t‚|d j|ks2t‚|d j|ksDt‚|d j|ksVt‚d S )Nr   r   r   r   )r   r   r	   )r   r   r   r	   r   r   r   r   Ú-test_uk_tokenizer_splits_two_diff_close_punct2   s
    r   c             C   sL   | || | | ƒ}t |ƒdks$t‚|d j|ks6t‚|d j|ksHt‚d S )Né   r   r   )r   r   r	   )r   r   r	   r   r   r   r   Ú(test_uk_tokenizer_splits_same_open_punct?   s    r   c             C   sL   | || | | ƒ}t |ƒdks$t‚|d j|ks6t‚|d j|ksHt‚d S )Nr   r   r   )r   r   r	   )r   r   r	   r   r   r   r   Ú)test_uk_tokenizer_splits_same_close_punctJ   s    r   u	   'Ð¢ÐµÑÑ‚c             C   s.   | |ƒ}t |ƒdkst‚|d jdks*t‚d S )Nr   r   r   )r   r   r	   )r   r	   r   r   r   r   Ú)test_uk_tokenizer_splits_open_appostropheU   s    r    u
   Ð¢ÐµÑÑ‚''c             C   s4   | |ƒ}t |ƒdkst‚| dƒ}t |ƒdks0t‚d S )Nr   z''r   )r   r   )r   r	   r   Ztokens_punctr   r   r   Ú)test_uk_tokenizer_splits_double_end_quote\   s    r!   zpunct_open,punct_closec             C   sZ   | || | ƒ}t |ƒdks t‚|d j|ks2t‚|d j|ksDt‚|d j|ksVt‚d S )Nr   r   r   r   )r   r   r	   )r   Ú
punct_openÚpunct_closer	   r   r   r   r   Ú)test_uk_tokenizer_splits_open_close_punctd   s
    r$   zpunct_open2,punct_close2c             C   s†   | || | | | ƒ}t |ƒdks(t‚|d j|ks:t‚|d j|ksLt‚|d j|ks^t‚|d j|kspt‚|d j|ks‚t‚d S )Né   r   r   r   r   r   )r   r   r	   )r   r"   r#   Zpunct_open2Zpunct_close2r	   r   r   r   r   Ú test_uk_tokenizer_two_diff_punctr   s    r&   u   ÐŸÑ€Ð¸Ð²ÐµÑ‚.u   ÐŸÑ€Ð¸Ð²Ñ–Ñ‚.u   ÒÐµÐ»Ò‘Ð¾Ñ‚Ð°Ñ‚Ð¸.u   Ð—'Ñ”Ð´Ð½Ð°Ð½Ð½Ñ.u   Ð„Ð´Ð½Ñ–ÑÑ‚ÑŒ.u	   Ñ—Ñ…Ð½Ñ–.c             C   s   | |ƒ}|d j dkst‚d S )Nr   Ú.)r	   r   )r   r	   r   r   r   r   Ú%test_uk_tokenizer_splits_trailing_dotƒ   s    r(   c             C   s*   d}| |ƒ}|t |ƒd  jdks&t‚d S )Nu+   (Ð Ð°Ð·, Ð´Ð²Ð°, Ñ‚Ñ€Ð¸, Ð¿Ñ€Ð¾Ð²ÐµÑ€ÐºÐ°).r   r'   )r   r	   r   )r   r	   r   r   r   r   Ú'test_uk_tokenizer_splits_bracket_period‹   s    r)   c             C   s4   d}| |ƒ}|d j dkst‚|d j dks0t‚d S )Nu?   Ð¥Ð»Ñ–Ð±Ñ–ÌÐ² Ð½Ðµ Ð±ÑƒÐ»Ð¾Ì. Ð¥Ð»Ñ–Ð±Ñ–ÌÐ² Ð½Ðµ Ð±ÑƒÐ»Ð¾Ì.r   u
   Ð±ÑƒÐ»Ð¾Ìr   r'   )r	   r   )r   r	   r   r   r   r   Ú*test_uk_tokenizer_handles_final_diacritics‘   s    r*   )r   r   )r   r   )r   r   )r   r   )r   r   )ZpytestZ
PUNCT_OPENZPUNCT_CLOSEZPUNCT_PAIREDÚmarkZparametrizer   r   r   r   r   r   r   r    r!   r$   r&   r(   r)   r*   r   r   r   r   Ú<module>   sJ   		 
 
		 