3
dR                 @   s  d Z ddlmZ ddlmZ ddlmZmZmZ ddl	m
Z
mZ ddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ dddddgZeZejejejejejdZejejejejejej ej!gZ"e"ej#ej$g Z%edededgZ&e"ej$g e& Z'ejej ejgZ(e"ej#g e& Z)e"ej#ej$g e& Z*ejejej gZ+dZ,e,dfZ-e,dfZ.G dd deZ/G dd de0Z1G dd dej2Z3dd Z4G dd deZ5dS )z
An RDF/XML parser for RDFLib
    )make_parser)ErrorHandler)handler	quoteattrescape)	urldefragurljoin)RDF	is_ncname)URIRef)BNode)Literal)ParserErrorError)Parsercreate_parserBagIDElementHandlerRDFXMLHandlerRDFXMLParser)aboutIDtyperesource	parseTypez4http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachz:http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefixz0http://www.w3.org/1999/02/22-rdf-syntax-ns#bagIDz$http://www.w3.org/XML/1998/namespacebaselangc                   s*   e Zd ZdgZ fddZdd Z  ZS )r   lic                s   t t| j| d| _d S )Nr   )superr   __init__r   )selfval)	__class__ ?/tmp/pip-build-7vycvbft/rdflib/rdflib/plugins/parsers/rdfxml.pyr   =   s    zBagID.__init__c             C   s   |  j d7  _ td| j   S )N   z_%s)r   RDFNS)r    r#   r#   r$   next_liA   s    zBagID.next_li)__name__
__module____qualname__	__slots__r   r'   __classcell__r#   r#   )r"   r$   r   :   s   c               @   s<   e Zd Zddddddddd	d
ddddgZdd Zdd ZdS )r   startcharendr   idr   subject	predicateobjectlistlanguagedatatypedeclareddatac             C   sR   d | _ d | _d | _d| _d | _d | _d | _d | _d | _d | _	d | _
d | _d | _d S )Nr   )r-   r.   r/   r   r0   r   r1   r3   r4   r5   r6   r7   r8   )r    r#   r#   r$   r   K   s    zElementHandler.__init__c             C   s   |  j d7  _ td| j   S )Nr%   z_%s)r   r&   )r    r#   r#   r$   r'   Z   s    zElementHandler.next_liN)r(   r)   r*   r+   r   r'   r#   r#   r#   r$   r   F   s
   
c               @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZeeZdd ZeeZdd  ZeeZd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Z d5d6 Z!d7d8 Z"d9S ):r   c             C   s   || _ d| _| j  d S )NF)storepreserve_bnode_idsreset)r    r9   r#   r#   r$   r   a   s    zRDFXMLHandler.__init__c             C   sF   t  }| j|_dd |_d |g| _i | _i | _i g| _| jd | _d S )Nc             S   s   d S )Nr#   )nameqnamer#   r#   r$   <lambda>i   s    z%RDFXMLHandler.reset.<locals>.<lambda>r%   )	r   document_element_startr-   r/   stackidsbnode_ns_contexts_current_context)r    Zdocument_elementr#   r#   r$   r;   f   s    

zRDFXMLHandler.resetc             C   s
   || _ d S )N)locator)r    rF   r#   r#   r$   setDocumentLocatorr   s    z RDFXMLHandler.setDocumentLocatorc             C   s   d S )Nr#   )r    r#   r#   r$   startDocumentu   s    zRDFXMLHandler.startDocumentc             C   s6   | j j| jj  || j|< | jj||p*ddd d S )N F)override)rD   appendrE   copyr9   bind)r    prefix	namespacer#   r#   r$   startPrefixMappingx   s    
z RDFXMLHandler.startPrefixMappingc             C   s   | j d | _| j d= d S )Nr%   r?   r?   )rD   rE   )r    rN   r#   r#   r$   endPrefixMapping}   s    zRDFXMLHandler.endPrefixMappingc             C   s   | j }|jt  | j}| j}|jtd }|d k	rzt|\}}|rV|jrVt	|j|}q| j
j ph| j
j }	|	rt	|	|}n6|r|j}|d kr| j
j p| j
j }	|	rt|	\}}||_|jtd }
|
d kr|r|j}
|
|_|j||| d S )N)rA   rK   r   currentparentgetBASEr   r   r   rF   getPublicIdgetSystemIdLANGr5   r-   )r    r<   r=   attrsrA   rR   rS   r   fragsystemIdr5   r#   r#   r$   startElementNS   s6    




zRDFXMLHandler.startElementNSc             C   s   | j j|| | jj  d S )N)rR   r/   rA   pop)r    r<   r=   r#   r#   r$   endElementNS   s    zRDFXMLHandler.endElementNSc             C   s   | j j}|r|| d S )N)rR   r.   )r    contentr.   r#   r#   r$   
characters   s    zRDFXMLHandler.charactersc             C   s   d S )Nr#   )r    r_   r#   r#   r$   ignorableWhitespace   s    z!RDFXMLHandler.ignorableWhitespacec             C   s   d S )Nr#   )r    targetr8   r#   r#   r$   processingInstruction   s    z#RDFXMLHandler.processingInstructionc             C   s`   |\}}}| j j|tjtjf | j j|tj|f | j j|tj|f | j j|tj|f d S )N)r9   addr	   r   Z	Statementr1   r2   r3   )r    ZsidZspospor#   r#   r$   add_reified   s
    
zRDFXMLHandler.add_reifiedc             C   s0   | j }d|j |j |j f }t|| d S )Nz
%s:%s:%s: )rF   rW   getLineNumbergetColumnNumberr   )r    messagerF   infor#   r#   r$   error   s
    zRDFXMLHandler.errorc             C   s
   | j d S )N   )rA   )r    r#   r#   r$   get_current   s    zRDFXMLHandler.get_currentc             C   s
   | j d S )Nr%   r?   )rA   )r    r#   r#   r$   get_next   s    zRDFXMLHandler.get_nextc             C   s
   | j d S )N   )rA   )r    r#   r#   r$   
get_parent   s    zRDFXMLHandler.get_parentc             C   s>   t | jj|dd}|r6|d dkr6|d dkr6d| }t|S )Nr%   )allow_fragments#z%s#r?   r?   )r   rR   r   r   )r    uriresultr#   r#   r$   
absolutize   s    zRDFXMLHandler.absolutizec             C   s   |d d krt |d }nt dj|}i }xv|j D ]j\}}|d d krT|d }n
dj|}|jts|dd j dkr~q6|tkr||t| < q6||t |< q6W ||fS )Nr   r%   rI   rr   xml)r   joinitems
startswithXMLNSlowerUNQUALIFIEDr&   )r    r<   r=   rY   attsnvattr#   r#   r$   convert   s    

zRDFXMLHandler.convertc             C   sJ   |d r8t dj|tjkr8t| d}| j|_| j|_n| j||| d S )Nr   rI   next)r   r{   r	   getattrnode_element_startr-   node_element_endr/   )r    r<   r=   rY   r   r#   r#   r$   r@      s
    

z$RDFXMLHandler.document_element_startc             C   s  | j |||\}}| j}| j}t| d}| j|_| j|_|tkrN| j	d|  t
j|krt
j|kslt
j|krv| j	d |t
j }t|s| j	d|  |d| }	|	| jkr| j	d|	  d| j|	< nt
j|krRt
j|kst
j|kr| j	d |t
j }
t|
s| j	d|
  | jd	krH|
| jkr6| j|
 }	nt }	|	| j|
< nt|
}	nDt
j|krt
j|ksvt
j|kr| j	d ||t
j }	nt }	|t
jkr| jj|	t
j||f |j}x|D ]}|jtts,||}yt|| |}W n0 tk
r( } z| j	|j W Y d d }~X nX n|t
jkrNt
j}||t
j }nz|tkr^qnj|tkr|| j	d
|  qnL||}yt|| |}W n0 tk
r } z| j	|j W Y d d }~X nX | jj|	||f qW |	|_ d S )Nr   zInvalid node element URI: %sz9Can have at most one of rdf:ID, rdf:about, and rdf:nodeIDz&rdf:ID value is not a valid NCName: %sz#%sz)two elements cannot use the same ID: '%s'r%   z*rdf:nodeID value is not a valid NCName: %sFz"Invalid property attribute URI: %s)!r   rR   ry   r   property_element_startr-   property_element_endr/   NODE_ELEMENT_EXCEPTIONSrm   r	   r   r   nodeIDr
   rB   r:   rC   r   Descriptionr9   rd   r   r5   r}   strr&   r   r   msgNODE_ELEMENT_ATTRIBUTESPROPERTY_ATTRIBUTE_EXCEPTIONSr1   )r    r<   r=   rY   r   rR   ry   r   r0   r1   r   r5   r   r2   r3   er#   r#   r$   r      s    








 

z RDFXMLHandler.node_element_startc             C   s<   | j jr,| j| jd kr,| jddj|  | jj| j _d S )Nrn   z1Repeat node-elements inside property elements: %srI   )rS   r3   rR   rA   rm   r{   r1   )r    r<   r=   r#   r#   r$   r   G  s    zRDFXMLHandler.node_element_endc             C   s  | j |||\}}| j}| j}t| d}d }d |_d |_|jttsR|||_	n8|t
jkrh|j |_	n"|tkr| jd|  n
|||_	|jt
jd }	|	d k	rt|	s| jd|	  |d|	 |_nd |_|jt
jd }
|jt
jd }|jt
jd }|
d k	r|d k	r| jd |
d k	r:||
}| j|_| j|_nv|d k	rt|s\| jd|  | jdkr|| jkr| j| }nt }|| j|< |}nt| }}| j|_| j|_n|d k	rx4|D ],}|t
jkr|t
jkr| jd|  qW |d	kr$t  |_}| j|_| j |_| j!|_nf|d
krRd |_t
j" }|_| j|_| j#|_n8t$dt
j%d}| j&|_t'di|_(| j)|_| j&|_| j*|_||_+d S d }| j|_| j|_| j|_|jt
j,d  }|_,|j-}|d k	r||}nx|D ]}|jtts||}n2|t.krqn"|t/kr,| jd|  n||}|t
j0krNt1|| }n|d k	r\d }t$|| ||}|d kr|t }| j2j3|||f qW |d krd|_d |_+nd |_||_+d S )Nr   z Invalid property element URI: %sz&rdf:ID value is not a value NCName: %sz#%sz=Property element cannot have both rdf:nodeID and rdf:resourcez*rdf:nodeID value is not a valid NCName: %sFz#Property attr '%s' now allowed hereResource
CollectionrI   )r6   rz   z"Invalid property attribute URI: %s)4r   rR   ry   r   r8   r4   r}   r   r&   r2   r	   r   r'   PROPERTY_ELEMENT_EXCEPTIONSrm   rT   r   r
   r0   r   r   r   r   r-   r   r/   r:   rC   r   r1   property_element_charr.   r   r   nillist_node_element_endr   Z
XMLLiteralliteral_element_charr~   r7   literal_element_startliteral_element_endr3   r6   r5   PROPERTY_ELEMENT_ATTRIBUTESr   r   r   r9   rd   )r    r<   r=   rY   r   rR   ry   r   r3   r0   r   r   Z
parse_typer1   r   r6   r5   r2   rg   r#   r#   r$   r   Q  s    
























z$RDFXMLHandler.property_element_startc             C   s"   | j }|jd k	r| j|7  _d S )N)rR   r8   )r    r8   rR   r#   r#   r$   r     s    
z#RDFXMLHandler.property_element_charc             C   s   | j }|jd k	rF|jd krF|j}|jd k	r.d }t|j||j|_d |_| jj| jkrx|jt	j
krx| jj|jt	jt	j
f |jd k	r| jj| jj|j|jf |jd k	r| j|j| jj|j|jf d |_d S )N)rR   r8   r3   r5   r6   r   r   r/   r   r	   r   r9   rd   r4   restrS   r1   r2   r0   rh   )r    r<   r=   rR   ZliteralLangr#   r#   r$   r     s$    


z"RDFXMLHandler.property_element_endc             C   s   | j }| jjtjkrNt }|| j_| jj| jjtj|j	f || j_
d | j_n<t }| jj| jjtj|f | jj|tj|j	f || j_d S )N)rR   rS   r4   r	   r   r   r9   rd   firstr1   r3   r.   r   )r    r<   r=   rR   r4   r#   r#   r$   r     s    
z#RDFXMLHandler.list_node_element_endc             C   sh  | j }| j| j_| j| j_| j| j_| jj	j
 |_	|d r| j|d  }|r`d||d f |_nd|d  |_|d |j	kr||j	|d < |r| jd||d f 7  _q| jd|d  7  _nd|d  |_x|j D ]x\}}|d r0|d |j	kr| j|d  |j	|d < |j	|d  d |d  }n|d }| jd|t|f 7  _qW | jd	7  _d S )
Nr   z<%s:%sr%   z<%sz xmlns:%s="%s"z xmlns="%s":z %s=%s>)rR   r   r   r-   r   r.   r   r/   rS   r7   rL   rE   r3   r|   r   )r    r<   r=   rY   rR   rN   valuer#   r#   r$   r     s0    



z#RDFXMLHandler.literal_element_startc             C   s   | j  jt|7  _d S )N)rR   r3   r   )r    r8   r#   r#   r$   r     s    z"RDFXMLHandler.literal_element_charc             C   sb   |d r:| j |d  }|r,d||d f }qFd|d  }nd|d  }| j j| jj| 7  _d S )Nr   z</%s:%s>r%   z</%s>)rE   rS   r3   rR   )r    r<   r=   rN   r/   r#   r#   r$   r     s    z!RDFXMLHandler.literal_element_endN)#r(   r)   r*   r   r;   rG   rH   rP   rQ   r\   r^   r`   ra   rc   rh   rm   rp   propertyrR   rq   r   rt   rS   ry   r   r@   r   r   r   r   r   r   r   r   r   r#   r#   r#   r$   r   _   s>   R
yc             C   sf   t  }y|jdd W n tk
r*   Y nX |jtjd t|}|j|  |j| |j	t
  |S )Nrz   z$http://www.w3.org/XML/1998/namespacer%   )r   Zstart_namespace_declAttributeError
setFeaturer   feature_namespacesr   rG   setContentHandlersetErrorHandlerr   )rb   r9   parserZrdfxmlr#   r#   r$   r   #  s    

c               @   s   e Zd Zdd Zdd ZdS )r   c             C   s   d S )Nr#   )r    r#   r#   r$   r   7  s    zRDFXMLParser.__init__c             K   s@   t ||| _| jj }|jdd }|d k	r0||_| jj| d S )Nr:   )r   _parsergetContentHandlerrT   r:   parse)r    sourceZsinkargsZcontent_handlerr:   r#   r#   r$   r   :  s    
zRDFXMLParser.parseN)r(   r)   r*   r   r   r#   r#   r#   r$   r   5  s   N)6__doc__Zxml.saxr   Zxml.sax.handlerr   xml.sax.saxutilsr   r   r   Zsix.moves.urllib.parser   r   Zrdflib.namespacer	   r
   Zrdflib.termr   r   r   Zrdflib.exceptionsr   r   Zrdflib.parserr   __all__r&   r   r   r   r   r   r   r   r6   ZCORE_SYNTAX_TERMSr   r   ZSYNTAX_TERMSZ	OLD_TERMSr   r   r   r   r   r~   rU   rX   r   r3   r   ContentHandlerr   r   r   r#   r#   r#   r$   <module>   sP   

   G