3
d@                 @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ dd Zd	d
 Zdd ZG dd deZedkrd dlZej  dS )    N)ZipFilePathPointer)find_dir	find_filefind_jars_within_path)ParserI)DependencyGraph)taggedsents_to_conllc              C   sR   ddl m}  | d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLg}|jS )MNr   )RegexpTagger\.$.\,$,\?$?\($(\)$)\[$[\]$]^-?[0-9]+(\.[0-9]+)?$CD(The|the|A|a|An|an)$DT&(He|he|She|she|It|it|I|me|Me|You|you)$PRP(His|his|Her|her|Its|its)$PRP$(my|Your|your|Yours|yours)$ (on|On|in|In|at|At|since|Since)$IN (for|For|ago|Ago|before|Before)$(till|Till|until|Until)$(by|By|beside|Beside)$(under|Under|below|Below)$(over|Over|above|Above)$ (across|Across|through|Through)$(into|Into|towards|Towards)$(onto|Onto|from|From)$.*able$JJ.*ness$NN.*ly$RB.*s$NNS.*ing$VBG.*ed$VBD.*)r
   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r    r   )r!   r"   )r#   r"   )r$   r"   )r%   r"   )r&   r"   )r'   r"   )r(   r"   )r)   r"   )r*   r"   )r+   r,   )r-   r.   )r/   r0   )r1   r2   )r3   r4   )r5   r6   )r7   r.   )Znltk.tagr	   tag)r	   Z_tagger r9   //tmp/pip-build-v9q4h5k9/nltk/nltk/parse/malt.pymalt_regex_tagger   s>    r;   c             C   sx   t jj| r| }nt| dd}dddg}tt|}dd |D }dddh}|j|sZttt	d	d
 |sptt
|S )zE
    A module to find MaltParser .jar file and its dependencies.
    MALT_PARSER)env_vars c             S   s   h | ]}t jj|d  qS )   )ospathsplit).0jarr9   r9   r:   	<setcomp>H   s    z"find_maltparser.<locals>.<setcomp>z	log4j.jarz
libsvm.jarzliblinear-1.8.jarc             S   s   | j do| jdS )Nzmaltparser-z.jar)
startswithendswith)ir9   r9   r:   <lambda>M   s    z!find_maltparser.<locals>.<lambda>)r<   )r@   rA   existsr   setr   issubsetAssertionErroranyfilterlist)parser_dirnameZ	_malt_dirZmalt_dependenciesZ
_malt_jarsZ_jarsr9   r9   r:   find_maltparser=   s    

rR   c             C   s.   | dkrdS t jj| r| S t| dddS dS )z8
    A module to find pre-trained MaltParser model.
    Nzmalt_temp.mco
MALT_MODELF)r=   verbose)rS   )r@   rA   rJ   r   )model_filenamer9   r9   r:   find_malt_modelR   s
    rV   c               @   sZ   e Zd ZdZdddZddd	Zdd
dZdddZedddZ	dddZ
dddZdS )
MaltParsera  
    A class for dependency parsing with MaltParser. The input is the paths to:
    - (optionally) a maltparser directory
    - (optionally) the path to a pre-trained MaltParser .mco model file
    - (optionally) the tagger to use for POS tagging before parsing
    - (optionally) additional Java arguments

    Example:
        >>> from nltk.parse import malt
        >>> # With MALT_PARSER and MALT_MODEL environment set.
        >>> mp = malt.MaltParser(model_filename='engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
        >>> # Without MALT_PARSER and MALT_MODEL environment.
        >>> mp = malt.MaltParser('/home/user/maltparser-1.9.2/', '/home/user/engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
    r>   Nc             C   sT   t || _|dk	r|ng | _t|| _| jdk| _tj | _|dk	rH|nt	 | _
dS )a  
        An interface for parsing with the Malt Parser.

        :param parser_dirname: The path to the maltparser directory that
            contains the maltparser-1.x.jar
        :type parser_dirname: str
        :param model_filename: The name of the pre-trained model with .mco file
            extension. If provided, training will not be required.
            (see http://www.maltparser.org/mco/mco.html and
            see http://www.patful.com/chalk/node/185)
        :type model_filename: str
        :param tagger: The tagger used to POS tag the raw string before
            formatting to CONLL format. It should behave like `nltk.pos_tag`
        :type tagger: function
        :param additional_java_args: This is the additional Java arguments that
            one can use when calling Maltparser, usually this is the heapsize
            limits, e.g. `additional_java_args=['-Xmx1024m']`
            (see https://goo.gl/mpDBvQ)
        :type additional_java_args: list
        Nzmalt_temp.mco)rR   	malt_jarsadditional_java_argsrV   model_trainedtempfile
gettempdirworking_dirr;   tagger)selfrQ   rU   r_   rY   r9   r9   r:   __init__r   s    


zMaltParser.__init__Fnullc          !   c   sP  | j stdtjd| jddd
}tjd| jddd}xt|D ]}|jt| qFW |j  | j	|j
|j
dd}tj }ytjtjj| jd	  W n   Y nX | j||}	tj| |	d	krtd
dj||	f t|j
2}
x*|
j jdD ]}tt||dgV  qW W dQ R X W dQ R X W dQ R X tj|j
 tj|j
 dS )a  
        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
        sentences where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph
            representation of each sentence
        z0Parser has not been trained. Call train() first.zmalt_input.conll.wF)prefixdirmodedeletezmalt_output.conll.parse)rf   r   z0MaltParser parsing (%s) failed with exit code %d z

)top_relation_labelN)r[   	Exceptionr\   NamedTemporaryFiler^   r   writestrclosegenerate_malt_commandnamer@   getcwdchdirrA   rB   rZ   _executejoinopenreaditerr   remove)r`   	sentencesrT   rj   
input_fileZoutput_filelinecmdZ_current_pathretinfileZtree_strr9   r9   r:   parse_tagged_sents   sD    
2zMaltParser.parse_tagged_sentsc                s"    fdd|D } j |||dS )an  
        Use MaltParser to parse multiple sentences.
        Takes a list of sentences, where each sentence is a list of words.
        Each sentence will be automatically tagged with this
        MaltParser instance's tagger.

        :param sentences: Input sentences to parse
        :type sentence: list(list(str))
        :return: iter(DependencyGraph)
        c             3   s   | ]} j |V  qd S )N)r_   )rC   Zsentence)r`   r9   r:   	<genexpr>   s    z)MaltParser.parse_sents.<locals>.<genexpr>)rj   )r   )r`   rz   rT   rj   Ztagged_sentencesr9   )r`   r:   parse_sents   s    zMaltParser.parse_sentsc             C   s   dg}|| j 7 }tjjdr dnd}|d|j| jg7 }|dg7 }tjj| j	rl|dtjj
| j	d g7 }n|d| j	g7 }|d	|g7 }|d
kr|d|g7 }|d|g7 }|S )a  
        This function generates the maltparser command use at the terminal.

        :param inputfilename: path to the input file
        :type inputfilename: str
        :param outputfilename: path to the output file
        :type outputfilename: str
        javawin;:z-cpzorg.maltparser.Maltz-cr?   z-irh   z-oz-m)rY   sysplatformrF   ru   rX   r@   rA   rJ   rZ   rB   )r`   ZinputfilenameZoutputfilenamerf   r}   Zclasspaths_separatorr9   r9   r:   rp      s    


z MaltParser.generate_malt_commandc             C   s&   |rd nt j}t j| ||d}|j S )N)stdoutstderr)
subprocessPIPEPopenwait)r}   rT   outputpr9   r9   r:   rt     s    zMaltParser._executec             C   sb   t jd| jddd(}djdd |D }|jt| W dQ R X | j|j|d	 tj	|j dS )
z
        Train MaltParser from a list of ``DependencyGraph`` objects

        :param depgraphs: list of ``DependencyGraph`` objects for training input data
        :type depgraphs: DependencyGraph
        zmalt_train.conll.rc   F)rd   re   rf   rg   
c             s   s   | ]}|j d V  qdS )
   N)Zto_conll)rC   Zdgr9   r9   r:   r      s    z#MaltParser.train.<locals>.<genexpr>N)rT   )
r\   rl   r^   ru   rm   rn   train_from_filerq   r@   ry   )r`   Z	depgraphsrT   r{   Z	input_strr9   r9   r:   train  s    	zMaltParser.trainc             C   s   t |tr`tjd| jddd<}|j }|j }|jt| W dQ R X | j	|j
|dS Q R X | j|dd}| j||}|d	krtd
dj||f d| _dS )z
        Train MaltParser from a file
        :param conll_file: str for the filename of the training input data
        :type conll_file: str
        zmalt_train.conll.rc   F)rd   re   rf   rg   N)rT   Zlearn)rf   r   z1MaltParser training (%s) failed with exit code %dri   T)
isinstancer   r\   rl   r^   rv   rw   rm   rn   r   rq   rp   rt   rk   ru   r[   )r`   Z
conll_filerT   r{   Zconll_input_fileZ	conll_strr}   r~   r9   r9   r:   r   '  s    	

zMaltParser.train_from_file)r>   NNN)Frb   )Frb   )NN)F)F)F)__name__
__module____qualname____doc__ra   r   r   rp   staticmethodrt   r   r   r9   r9   r9   r:   rW   ^   s      
%
B

 
rW   __main__)inspectr@   r   r   r\   Z	nltk.datar   Znltk.internalsr   r   r   Znltk.parse.apir   Znltk.parse.dependencygraphr   Znltk.parse.utilr   r;   rR   rV   rW   r   doctesttestmodr9   r9   r9   r:   <module>
   s"   & gC