3
d              	   @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlZddl	Z	ddl
Z
ddlmZ ddlmZ ddlmZmZmZmZmZmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" dddddddddg	Z#ddl$m%Z% diddZ&dd Z'dd Z(dd Z)dZ*d Z+d!Z,d"Z-e+e,e-fZ.e*e+e,e-fZ/dZ0d Z1d!Z2d#Z3d$Z4d"Z5d%Z6d&Z7e7d' Z8e7d( Z9e7d) Z:d*Z;d+Z<d,Z=e=d- Z>e7d. Z?d/Z@e<ZAd0ZBe0eAd1 fZCe0eAd2 fZDe0eAd3 fZEe0eAd4 fZFe0eAd5 fZGe0eAd6 fZHdaId7d ZJdaKd8d ZLd9ZMd:ZNd;d< ZOd=d> ZPe9ZQe:ZRd?ZSe0e;fZTe0e>fZUd@ZVedA ZWedB ZXedC ZYedD ZZedE Z[dZ\dFZ]e]dG Z^e]dH Z_d+Z`dIZadJZbdKdL Zce"rdMdL ZcejddNZeejddOZfd?ZgejddPZhejddQZiejddRZjejddSZkejddSZlejddTZmejddUZnejddVZoejddWZpejddXZqG dYdZ dZZrG d[d desZtG d\d] d]euZvejdd^ZwG d_d` d`euZxdad ZyG dbd de%ZzG dcd dezZ{ddde Z|dfdg Z}e~dhkre}  dS )ja  
notation3.py - Standalone Notation3 Parser
Derived from CWM, the Closed World Machine

Authors of the original suite:

* Dan Connolly <@@>
* Tim Berners-Lee <@@>
* Yosi Scharf <@@>
* Joseph M. Reagle Jr. <reagle@w3.org>
* Rich Salz <rsalz@zolera.com>

http://www.w3.org/2000/10/swap/notation3.py

Copyright 2000-2007, World Wide Web Consortium.
Copyright 2001, MIT.
Copyright 2001, Zolera Systems Inc.

License: W3C Software License
http://www.w3.org/Consortium/Legal/copyright-software

Modified by Sean B. Palmer
Copyright 2007, Sean B. Palmer.

Modified to work with rdflib by Gunnar Aastrand Grimnes
Copyright 2010, Gunnar A. Grimnes

    )absolute_import)division)print_functionN)Decimal)uuid4)URIRefBNodeLiteralVariable_XSD_PFX
_unique_id)QuotedGraphConjunctiveGraphGraph)b)binary_type)	long_type)string_types)	text_type)unichr)narrow_build	BadSyntaxN3ParserTurtleParser
splitFragPjoinbaserunNamespace	uniqueURIhexify)Parserc             C   s6   | j d}|dkr*| d| | |d fS | dfS dS )zsplit a URI reference before the fragment

    Punctuation is kept.

    e.g.

    >>> splitFragP("abc#def")
    ('abc', '#def')

    >>> splitFragP("abcdef")
    ('abcdef', '')

    #r   N )rfind)Zurirefpuncti r&   B/tmp/pip-build-7vycvbft/rdflib/rdflib/plugins/parsers/notation3.pyr   A   s    
c       
      C   s  |j d}|j d}|dkr0|dk s,||k r0|S | j d}|dksNtd|  t|\}}|sf| | S | |d |d  dkrtd| |f | |d |d  d	kr| j d|d }n|d }|dk rt| }| d } |d
d d	kr| d
|d  | S |d
d dkr| d
| | S | jd}x|d
d dkrH|dd
 }|dkrXd}nX|d
d dkst|dkr|dd
 }| jd||}	|	dkr| d
|	d  } |	}nP q*W | d
|d  | | S )a  join an absolute URI and URI reference
    (non-ascii characters are supported/doctested;
    haven't checked the details of the IRI spec though)

    ``here`` is assumed to be absolute.
    ``there`` is URI reference.

    >>> join('http://example/x/y/z', '../abc')
    'http://example/x/abc'

    Raise ValueError if there uses relative path
    syntax but here has no hierarchical path.

    >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
        raise ValueError(here)
    ValueError: Base <mid:foo@example> has no slash
    after colon - with relative '../foo'.

    >>> join('http://example/x/y/z', '')
    'http://example/x/y/z'

    >>> join('mid:foo@example', '#foo')
    'mid:foo@example#foo'

    We grok IRIs

    >>> len(u'Andr\xe9')
    5

    >>> join('http://example.org/', u'#Andr\xe9')
    u'http://example.org/#Andr\xe9'
    /:r   zBase uri '%s' is not absolute      z8Base <%s> has no slash after colon - with relative '%s'.   z//Nz./.r"   z../z..)findAssertionErrorr   
ValueErrorlenr#   )
hereZthereZslashlZcolonlZbcolonlpathfragbpathZslashrr%   r&   r&   r'   r   X   sL    &







c               C   s   dt tj  d S )a  The base URI for this process - the Web equiv of cwd

    Relative or abolute unix-standard filenames parsed relative to
    this yeild the URI of the file.
    If we had a reliable way of getting a computer name,
    we should put it in the hostname just to prevent ambiguity

    zfile://r(   )	_fixslashosgetcwdr&   r&   r&   r'   r      s    
c             C   s4   | j dd} | d dkr0| d dkr0| dd } | S )z8 Fix windowslike filename to unixlike - (#ifdef WINDOWS)\r(   r   r*   r)   r+   N)replace)sr&   r&   r'   r6      s    r6   r*   r+   r,            z#http://www.w3.org/2000/10/swap/log#isforSomeforAllz/http://www.w3.org/1999/02/22-rdf-syntax-ns#typez+http://www.w3.org/1999/02/22-rdf-syntax-ns#zhttp://www.w3.org/2002/07/owl#ZsameAsZparsesToz$http://www.w3.org/TR/REC-rdf-syntax/z&http://www.w3.org/2000/10/swap/log.n3#firstrestlinilListEmptyc               C   s   t dkrtt t d a t S )z:Return a URI suitable as a namespace for run-local objectsNr!   )runNamespaceValuer   r   r   r&   r&   r&   r'   r      s    c               C   s   t d7 a t d tt  S )zA unique URIr*   Zu_)nextur   strr&   r&   r&   r'   r     s    F2   c              O   s   d S )Nr&   )argskargsr&   r&   r'   BecauseOfData  s    rN   c              O   s   d S )Nr&   )rL   rM   r&   r&   r'   becauseSubexpression  s    rO   r!   z*http://www.w3.org/2000/10/swap/log#impliesbooleandecimaldoublefloatintegerz	
 !"#$&'()*,+/;<=>?@[\]^`{|}~r-   r)   ZABCDEFabcdef0123456789z(_~.-!$&'()*+,;=/?#@%)c          
   C   s8   yt t| jddS    td| jd Y nX d S )Nr*      zInvalid unicode code point: )r   intgroup	Exception)mr&   r&   r'   unicodeExpand;  s    rZ   c             C   sP   yt t| jddS  tk
rJ   tjd| jd  tj| jddS X d S )Nr*   rU   zEncountered a unicode char > 0xFFFF in a narrow python build. Trying to degrade gracefully, but this can cause problems later when working with the string:
%sr   Zunicode_escape)r   rV   rW   r0   warningswarncodecsdecode)rY   r&   r&   r'   rZ   B  s    z\\u([0-9a-fA-F]{4})z\\U([0-9a-fA-F]{8})z[ \t]*(#[^\n]*)?\r?\nz[ \t]*(#[^\n]*)?$z[ \t]*z[-+]?[0-9]+z[-+]?[0-9]*\.[0-9]+z9[-+]?(?:[0-9]+\.[0-9]*|\.[0-9]+|[0-9]+)(?:e|E)[-+]?[0-9]+z[0-9]+z[\\\r\n\"\']z[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*c               @   sd  e Zd ZdXddZdd Zdd	 Zd
d Zdd Zdd Zdd Z	dYddZ
dd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- ZdZd.d/Zd0d1 Zd2d3 Zd[d4d5Zd6d7 Zd8d9 Zd:d; Zd<d= Zd>d? Z d@dA Z!dBdC Z"dDdE Z#dFdG Z$dHdI Z%dJdK Z&dLdM Z'dNdO Z(dPdQ Z)dRdS Z*dTdU Z+dVdW Z,dS )\
SinkParserNr"   Fc             C   sf  i | _ |dkr0d|ks"td| |d | j d< || _|rD|j| || _d| _d| _|| _dddd	d
dddg| _d| _	i | _
i | _i | _|| _|| _|rdnd| _d| _trt|j|| jd| _|r|| _n|r|| _nd| _| j sd| jkst| js| jr| jd | _nt | _|dkrN| jrB|j|d | _n
|j | _n|| _| j| _d| _dS )zf note: namespace names should *not* end in  # ;
        the  # will get added during qname processing r"   r)   zDocument URI not absolute: <%s>r!   r   athisbindhasr?   oftruefalse"'N)Zbecausez#_gz	#_formula)rg   rh   )rg   )	_bindingsr/   _storeZsetGenPrefix_thisDoclinesstartOfLine
_genPrefixkeywordskeywordsSet_anonymousNodes
_variables_parentVariablesZ_reasonturtlestring_delimiters_reason2trackingrN   	newSymbol_baseURIr   
newFormula_formula_context_parentContext)selfstoreZopenFormulaZthisDocbaseURIZ	genPrefixwhyrt   r&   r&   r'   __init__g  sR    


zSinkParser.__init__c             C   s   d| j | j|| j d f S )a(  String generated from position in file

        This is for repeatability when refering people to bnodes in a document.
        This has diagnostic uses less formally, as it should point one to which
        bnode the arbitrary identifier actually is. It gives the
        line and character number of the '[' charcacter or path character
        which introduced the blank node. The first blank node is boringly
        _L1C1. It used to be used only for tracking, but for tests in general
        it makes the canonical ordering of bnodes repeatable.z	%s_L%iC%ir*   )rn   rl   rm   )r~   r%   r&   r&   r'   r2     s    
zSinkParser.herec             C   s   | j S )N)r{   )r~   r&   r&   r'   formula  s    zSinkParser.formulac             C   s   | j |j S )N)loadBufread)r~   streamr&   r&   r'   
loadStream  s    zSinkParser.loadStreamc             C   s   | j   | j| | j S )z1Parses a buffer and returns its top level formula)startDocfeedendDoc)r~   bufr&   r&   r'   r     s    
zSinkParser.loadBufc             C   s   t |tsB|jd}t|dkrF|d tjjdkrF|dd }n|}d}xF|dkr| j||}|dk rldS | j||}|dk rL| j||d qLW dS )a  Feed an octet stream tothe parser

        if BadSyntax is raised, the string
        passed in the exception object is the
        remainder after any statements have been parsed.
        So if there is more data to feed to the
        parser, it should be straightforward to recover.zutf-8r   r*   Nzexpected directive or statement)	
isinstancer   r^   r1   r]   BOM_UTF8	skipSpacedirectiveOrStatementr   )r~   octetsr;   r%   jr&   r&   r'   r     s    	

 
zSinkParser.feedc             C   sz   | j ||}|dk r|S | jr6| j||}|dkr6|S | j||}|dkrV| j||S | j||}|dkrv| j||S |S )Nr   )r   rt   sparqlDirective	directivecheckDot	statement)r~   argstrhr%   r   r&   r&   r'   r     s    zSinkParser.directiveOrStatementc             C   s   |d t kst|||d  dkr.|d }n|| jkr<dS |||t|  |krh||t|  tks|r||t|  dkr|t| }|S dS dS )zCheck for keyword.  Space must have been stripped on entry and
        we must not be at end of file.

        if colon, then keyword followed by colon is ok
        (@prefix:<blah> is ok, rdf:type shortcut a must be followed by ws)
        r   r*   @r)   Nr   )_notNameCharsr/   ro   r1   _notKeywordsChars)r~   tokr   r%   colonr&   r&   r'   r     s    

zSinkParser.tokc             C   s\   |d t kst|||t|  j |j krT||t|  tkrT|t| }|S dS dS )zCheck for SPARQL keyword.  Space must have been stripped on entry
        and we must not be at end of file.
        Case insensitive and not preceeded by @
        r   r*   Nr   )r   r/   r1   lower_notQNameChars)r~   r   r   r%   r&   r&   r'   	sparqlTok  s     zSinkParser.sparqlTokc             C   s6  | j ||}|dk r|S g }| jd||}|dkr@| j||d | jd||}|dkr| jrj| j||d | j|||| j}|dk r| j||d | j|d d   |S | jd||}|dkr:| jr| j||d | j|||| j}|dk r| j||d	 x8|D ]0}|| jks|| j	kr| j
j|| j|< qW |S | jd
||}|dkr| jrh| j||d | j|||| j}|dk r| j||d x|D ]}| j
j| qW |S | jd||dd}|dkrg }| j|||}|dk r| j||d | j|||}|dk r| j||d | j|d }| jrBt| j|}n d|krb| j||d| d  d|kspt|| j|d d < | j|d d t| |S | jd||}|dkr2g }| j|||}|dk r| j||d | j|d }| jrt| j|}n| j||d| d  d|ks(t|| _|S dS )Nr   rb   z%keyword bind is obsolete: use @prefixro   z%Found 'keywords' when in Turtle mode.z/'@keywords' needs comma separated list of wordsrA   z#Found 'forAll' when in Turtle mode.zBad variable list after @forAllr@   z$Found 'forSome' when in Turtle mode.z Bad variable list after @forSomeprefixT)r   zexpected qname after @prefixz'expected <uriref> after @prefix _qname_r*   r)   zWith no base URI, cannot use zrelative URI in @prefix <>r   zexpected <uri> after @base z&With no previous base URI, cannot use zrelative URI in @base  <z6With no base URI, cannot use relative URI in @prefix <z>With no previous base URI, cannot use relative URI in @base  <r   )r   r   r   rt   commaSeparatedListbareWordsetKeywordsuri_ref2rr   rs   r|   newUniversaldeclareExistentialqnameuriOfry   r   r/   ri   rb   r   )r~   r   r%   r   resxtnsr&   r&   r'   r     s    










zSinkParser.directivec             C   s  | j ||}|dk r|S | jd||}|dkrg }| j|||}|dk rV| j||d | j|||}|dk rz| j||d | j|d }| jrt| j|}nd|kr| j||d| d	  d|kst|| j	|d d < | j
|d d t| |S | jd
||}|dkrg }| j|||}|dk r6| j||d | j|d }| jrZt| j|}n| j||d| d	  d|ks~t|| _|S dS )za
        turtle and trig support BASE/PREFIX without @ and without
        terminating .
        r   PREFIXzexpected qname after @prefixz'expected <uriref> after @prefix _qname_r*   r)   zWith no base URI, cannot use zrelative URI in @prefix <r   ZBASEzexpected <uri> after @base z&With no previous base URI, cannot use zrelative URI in @base  <z6With no base URI, cannot use relative URI in @prefix <z>With no previous base URI, cannot use relative URI in @base  <r   )r   r   r   r   r   r   ry   r   r/   ri   rb   r   )r~   r   r%   r   r   r   r&   r&   r'   r     sP    

zSinkParser.sparqlDirectivec             C   s:   t |tstd|dkr(| jj| n| jj|| d S )Nz&Any unicode must be %x-encoded alreadyr"   )r   r   r/   rj   setDefaultNamespacerb   )r~   qnurir&   r&   r'   rb     s
    zSinkParser.bindc             C   s    |dkrd| _ n|| _d| _ dS )zTakes a list of stringsNr   r*   )rp   ro   )r~   kr&   r&   r'   r     s    zSinkParser.setKeywordsc             C   s   | j j| j d S )N)rj   r   r{   )r~   r&   r&   r'   r     s    zSinkParser.startDocc             C   s   | j j| j | jS )z8Signal end of document and stop parsing. returns formula)rj   r   r{   )r~   r&   r&   r'   r     s    zSinkParser.endDocc             C   s   | j j|| jd d S )N)r   )rj   makeStatementrv   )r~   	quadrupler&   r&   r'   r     s    zSinkParser.makeStatementc             C   sJ   g }| j |||}|dk r|S | j|||d }|dk rF| j||d |S )Nr   zexpected propertylist)objectproperty_listr   )r~   r   r%   rr   r&   r&   r'   r     s    

zSinkParser.statementc             C   s   | j |||S )N)item)r~   r   r%   r   r&   r&   r'   subject  s    zSinkParser.subjectc             C   s  | j ||}|dk r|S g }| jd||}|dkr| jrF| j||d | j|||}|dk rj| j||d |jd|d f |S | jd||}|dkr2| jr| j||d | j|||}|dk r| j||d | j ||}|dk r| j||d	 |}| jd
||}|dk r| j||d |jd|d f |S | jd||}|dkr\|jdtf |S |||d  dkr| jr| j||d |jd| jjt	d f |d S |||d  dkr(| jr| j||d ||d |d  dkr|jd| jjt	d f |d S |jdt
f |d S |||d  dkrn| jrT| j||d |jdt	d f |d S | j|||}|dkr|jd|d f |S |||d  dks|||d  dkr| j||d dS )z| has _prop_
        is _prop_ of
        a
        =
        _prop_
        >- prop ->
        <- prop -<
        _operator_r   rc   z"Found 'has' keyword in Turtle modezexpected property after 'has'z->r?   z!Found 'is' keyword in Turtle modezexpected <property> after 'is'z/End of file found, expected property after 'is'rd   zexpected 'of' after 'is' <prop>z<-r`   r+   z<=zFound '<=' in Turtle mode. Zimpliesr*   =zFound '=' in Turtle moder   z:=zFound ':=' in Turtle modeZbecomesz>-z>- ... -> syntax is obsolete.r   )r   r   rt   r   propappendRDF_typerj   rx   Logic_NSDAML_sameAs)r~   r   r%   r   r   r   r&   r&   r'   verb  s    




,zSinkParser.verbc             C   s   | j |||S )N)r   )r~   r   r%   r   r&   r&   r'   r   A  s    zSinkParser.propc             C   s   | j |||S )N)r3   )r~   r   r%   r   r&   r&   r'   r   D  s    zSinkParser.itemc             C   s   | j j| j|| jdS )N)r   )rj   newBlankNoder|   rv   )r~   r   r&   r&   r'   	blankNodeG  s    zSinkParser.blankNodec       	      C   s   | j |||}|dk r|S x|||d  dkr|||d  }|j }| j| j|d}| j||d |}|dk r| j||d |j }|dkr| j| j|||f n| j| j|||f |j| qW |S )z#Parse the path production.
        r   r*   z!^)r   z"EOF found in middle of path syntax^)	nodeOrLiteralpopr   r2   noder   r   r|   r   )	r~   r   r%   r   r   chsubjobjpredr&   r&   r'   r3   J  s"    zSinkParser.pathc             C   s<   | j j|d}|dk	r|S | jj| j| jd}|| j |< |S )z?Remember or generate a term for one of these _: anonymous nodesN)r   )rq   getrj   r   r|   rv   )r~   lnZtermr&   r&   r'   anonymousNodea  s    
zSinkParser.anonymousNodec             C   s  |}| j ||}|dk r|S |}|||d  }|dkr| j|}| j ||d }|dk rj| j||d |||d  dkr@| jr| j||d |d }g }	| j|||	}|dkr2|	d }t|	dkrx |	D ]}
| j| jt||
f qW | j ||}|dk r| j||d |||d  dkr@|d }n| j||d	 |d
krV| j	|d}| j
|||}|dk r|| j||d | j ||}|dk r| j||d |||d  dkr| j||d |j| |d S | j r|dkr||d |d  }|dkr |d7 }|d }g }d}x| j ||}|dk rF| j||d |||d  dkrf|d }P |s|||d  dkr|d7 }n| j||d nd}g }| j|||}|dk r| j||d |j| jj|d  q"W |j| jj|| j |S |d }| j}| j| _| j}| j}| j| _i | _| jj | _| j}t| _|d
kr\| jj }|| _xn| j ||}|dk r| j||d |||d  dkr|d }P | j||}|dk rd| j||d qdW || _| j| _|| _| j| _|| _|| _|j|j  |S |dkr| jj}||d |d  }|dkrL| jj}|d7 }|d }g }x| j ||}|dk r~| j||d |||d  dkr|d }P g }| j|||}|dk r| j||d  |j| jj|d  qZW |j||| j |S | jd!||}|dkr | j||d& | jd$||}|dkrF|jd |S | jd%||}|dkrl|jd |S |d
kr| j|||}|dkr|S d'S )(zParse the <node> production.
        Space is now skipped once at the beginning
        instead of in multipe calls to self.skipSpace().
        r   r*   [zEOF after '['r   z(Found '[=' or '[ =' when in turtle mode.z'EOF when objectList expected after [ = ;zobjectList expected after [= N)r   zproperty_list expectedz,EOF when ']' expected after [ <propertyList>]z']' expected{r+   $Tzneeded '$}', found end.z$},zexpected: ','Fzexpected item in set or '$}'zneeded '}', found end.}zexpected statement or '}'(zneeded ')', found end.)zexpected item in list or ')'ra   z'Keyword 'this' was ancient N3. Now use z@forSome and @forAll keywords.re   rf   zEKeyword 'this' was ancient N3. Now use @forSome and @forAll keywords.r   )r   r2   r   rt   
objectListr1   r   r|   r   r   r   r   r   rj   internnewSetr}   rq   rs   rr   copyrv   rO   rz   r   closenewListr   r   )r~   r   r%   r   ZsubjectAlreadyr   r   r   ZbnodeIDobjsr   Zch2rF   Z	first_runr   ZoldParentContextZparentAnonymousNodesZgrandParentVariablesZreason2Z
thing_typer&   r&   r'   r   j  s2   


























 





zSinkParser.nodec             C   s  xx<| j ||}|dk r(| j||d || dkr6P |d }qW |||d  dkr| jrj| j||d |d }g }| j||||}|dk r| j||d |}q|}g }| j|||}|dkr|S g }| j|||}|dk r| j||d	 xL|D ]D}|d \}	}
|	d
kr"| j| j|
||f q| j| j|
||f qW | j ||}|dk r^| j||d |||d  dkrx|S |d }qW dS )zUParse property list
        Leaves the terminating punctuation in the buffer
        r   z-EOF found when expected verb in property listr   r*   r+   z:-zFound in ':-' in Turtle modez!bad {} or () or [] node after :- zobjectList expectedz->zEOF found in list of objectsN)r   r   rt   r   r   r   r   r|   )r~   r   r%   r   r   r   vr   r   Zdirasymr&   r&   r'   r   '  sT     


zSinkParser.property_listc             C   s   | j ||}|dk r"| j||d || dkr2|S ||||}|dk rJdS xj| j ||}|dk rd|S |||d  }|dkr|dkrd	S |S |||d |}|dk rL| j||d qLW dS )
zereturn value: -1 bad syntax; >1 new position in argstr
        res has things found appended
        r   z"EOF found expecting comma sep listr-   r*   r   zbad list contentNr   r   )r   r   )r~   r   r   r   whatr%   r   r&   r&   r'   r   Y  s,    zSinkParser.commaSeparatedListc             C   s|   | j |||}|dk rdS x\| j||}|dk r>| j||d |||d  dkrV|S | j ||d |}|dk r|S qW d S )Nr   r*   zEOF found after objectr   r   )r   r   r   )r~   r   r%   r   r   r&   r&   r'   r   u  s    zSinkParser.objectListc             C   sv   | j ||}|dk r|S |||d  dkr4|d S |||d  dkrL|S |||d  dkrd|S | j||d d S )Nr   r*   r-   r   r   z.expected '.' or '}' or ']' at end of statement)r   r   )r~   r   r%   r   r&   r&   r'   r     s    zSinkParser.checkDotc             C   s  g }| j |||}|dkr|d \}}|dkrFds:td| jt }nty| j| }W nd tk
r   |dkr|j| j| |S | j r|dkrt	| jpdd}n| j
||d|  Y nX | jj|| }	|	| jkr|j| j|	  n
|j|	 |S | j||}|dk rdS || d	krNg }
| j|||
}|dkrJ|j|
d  |S dS || d
kr^|d }|}x|t|k rL|| dkr@||| }tjt|}tjt|}| jrt	| j|}nd|kstd||d | dkr|dd dk r|d }| jj|}	|	| jkr.|j| j|	  n
|j|	 |d S |d }qjW | j
||d nv| jrg }
| j|||
}|dk rdS |
d | jkr| j
||d|
d   |j| jj| jd |
d   |S dS dS )zGenerate uri from n3 representation.

        Note that the RDF convention of directly concatenating
        NS and local name is now used though I prefer inserting a '#'
        to make the namesapces look more like what XML folks expect.
        r   Nz	not used?_r"   r!   zPrefix "%s:" not boundr*   ?<r   r)   z0With no base URI, cannot deal with relative URIszunterminated URI referencezKeyword "%s" not allowed here.r   r   r   r   r   )r   r/   ry   
ADDED_HASHri   KeyErrorr   r   rt   r   r   rj   rx   rr   r   variabler1   unicodeEscape8subrZ   unicodeEscape4rp   r   ro   )r~   r   r%   r   r   r   pfxr   r   Zsymbr   stZurefr&   r&   r'   r     s    



*

 zSinkParser.uri_ref2c             C   sn   x4t j||}|dkrP | jd | _|j }|| _qW tj||}|dk	rR|j }tj||}|dk	rjdS |S )zjSkip white space, newlines and comments.
        return -1 if EOF, else position of first non-ws characterNr*   r   )eolmatchrl   endrm   wseof)r~   r   r%   rY   r&   r&   r'   r     s    
zSinkParser.skipSpacec             C   s8  | j ||}|dk rd	S |||d  dkr0d
S |d }|}|| dkr^| j||d||   x$|t|k r|| tkr|d }q`W | jdkr| jj| jd |||  }|| jkr| j	j
|| jd| j|< |j| j|  |S | jj| jd |||  }|| jkr$| jj
|| jd| j|< |j| j|  |S )z$     ?abc -> variable(:abc)
        r   r*   r   z0123456789-z"Varible name can't start with '%s'Nr!   )r   r   r   )r   r   r1   r   r}   rj   rx   ry   rr   r|   r   rv   r   rs   )r~   r   r%   r   r   ZvarURIr&   r&   r'   r     s2    

zSinkParser.variablec             C   st   | j ||}|dk rdS || dks0|| tkr4dS |}x$|t|k r\|| tkr\|d }q:W |j|||  |S )z     abc -> :abc
        r   r*   z0123456789-r   r   )r   r   r1   r   )r~   r   r%   r   r   r&   r&   r'   r     s    zSinkParser.bareWordc       	      C   sL  | j ||}|dk rdS || }|dkr,dS |tkr|}|d }x4|t|k rt|| }|tkrp|| }|d }qBP qBW ||d  dkr|dd }|sdS |d8 }nd}|t|k o|| dkr|}|dkrt}nt}|d }d	}d}x|t|k r|| }| r |d
kr d}|d7 }q|s0||kr|rZ|tkrt| j| j||d| nF|dkr||d  tks||d  tkrt| j| j||d| || }|d }d	}qP qW |rt| j| j||d||d  dkr|dd }|sdS |d8 }|j	||f |S |rD| j
rD|| jkrD|j	d|f |S dS dS )z
        xyz:def -> ('xyz', 'def')
        If not in keywords and keywordsSet: def -> ('', 'def')
        :def -> ('', 'def')
        r   r*   z0123456789-+.r-   Nr"   r)   r   Fr9   Tzillegal escape %r+   zillegal hex escape zqname cannot end with \r   r   r   r   r   r   r   )r   r   r1   r   escapeCharsr   rk   linehexCharsr   rp   ro   )	r~   r   r%   r   cr   r   ZallowedCharsZ	lastslashr&   r&   r'   r   ,  s|    
 



$
 zSinkParser.qnamec             C   s   | j |||}|dkr|S | j||}|dk r2dS |}|| | jkr|||d  || d krn|| d }n|| }|t| }| j|||\}}|j| jj| |S dS d S )Nr   r*   r,   r   r   )r   r   ru   r1   strconstr   rj   
newLiteral)r~   r   r%   r   r   delimr;   r&   r&   r'   r     s     zSinkParser.objectc             C   s   | j |||}| j}|dkr |S | j||}|dk r8d
S |}|| }|dkrtj||}|r~|j }|jt|||  |S tj||}|r|j }|jt	|||  |S t
j||}|r|j }|jt|||  |S || | jkr|||d  || d kr|| d }n|| }|t| }d }	| j|||\}}
d }|||d  dkrtj||d }|d krt| j|||d|j }||d | }|}|||d  d	krg }| j||d |}|d }	|j| jj|
|	| |S dS d S )Nr   r*   z-+0987654321.r,   r   z#Bad language code syntax on string zliteral, after @r+   z^^r   z3Bad language code syntax on string literal, after @r   )r   rl   r   exponent_syntaxr   r   r   rS   decimal_syntaxr   integer_syntaxr   ru   r1   r   langcoder   rk   r   rj   r   )r~   r   r%   r   r   	startliner   rY   r   dtr;   langZres2r&   r&   r'   r     sd    

 zSinkParser.nodeOrLiteralc             C   s   t |tr|d S |S )Nr*   )r   tuple)r~   r   r&   r&   r'   r     s    
zSinkParser.uriOfc             C   s<  |d }|d |d |d |d f\}}}}|}	d}
| j }x|	t|k r(||	 |kr
||krr|	d }||
fS ||kr
||	|	d  |kr|	d }|
| }
||
fS ||	|	d  |kr|	d }|
| }
||
fS ||	|	d  |kr|	d }||
fS |	d }	|
| }
q>tj||	}|sDtd||	d	 |	 ||	|	d	  f |j }y|
||	|  }
W np tk
r   d}x&||	| D ]}|d
t|  }qW tj	 d j
 }t| j|||	dd||f  Y nX || }||kr|}	q>q>|dkr||kr|
| }
|d }	q>q>|dkr^||kr:t| j|||d| j d | _ |
| }
|d }	|	| _q>|dkr>|d }	||	|	d  }|st| j|||ddj|}|dkrd| }|
| }
|	d }	q>|dkr| j||	d |\}	}|
| }
q>|dkr| j||	d |\}	}|
| }
q>| j||d q>W | j||d dS )zRparse an N3 string constant delimited by delim.
        return index, val
        r   r+   r,         r"   r*   z&Quote expected in string at ^ in %s^%s   z %02xz"Unicode error appending charactersz %s to string, because
	%srg   rh   z
znewline found in string literalr9   zunterminated string literal (2)z
abfrtvn\"'z
	
\"'uUz
bad escapezunterminated string literalN)rg   rh   )rl   r1   interestingsearchr/   startUnicodeErrorordsysexc_info__str__r   rk   rm   r.   uEscapeUEscape)r~   r   r%   r   Zdelim1Zdelim2Zdelim3Zdelim4Zdelim5r   ustrr   rY   errr   Zstreasonr   r   Zuchr&   r&   r'   r     s    $

$













zSinkParser.strconstc             C   s~   t ||| k r"t| j|||dy(|| |jtd| ||||   fS    t| j|||d||||   Y nX d S )Nzunterminated string literal(3)r9   zbad string literal hex escape: )r1   r   rk   r   rZ   )r~   r   r%   r   regnr   r&   r&   r'   _unicodeEscapeD  s    
(
zSinkParser._unicodeEscapec             C   s   | j |||tddS )Nr  r  )r  r   )r~   r   r%   r   r&   r&   r'   r  P  s    zSinkParser.uEscapec             C   s   | j |||tddS )N   r  )r  r   )r~   r   r%   r   r&   r&   r'   r  S  s    zSinkParser.UEscapec             C   s   t | j| j|||d S )N)r   rk   rl   )r~   r   r%   msgr&   r&   r'   r   V  s    zSinkParser.BadSyntax)Nr"   Nr"   NF)F)N)N)-__name__
__module____qualname__r   r2   r   r   r   r   r   r   r   r   r   rb   r   r   r   r   r   r   r   r   r   r   r3   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r   r&   r&   r&   r'   r_   f  sX    
@
`9Z
	
 >2W#Y?ar_   c               @   s(   e Zd Zdd Zdd Zedd ZdS )r   c             C   s(   |j d| _|| _|| _|| _|| _d S )Nzutf-8)encode_str_i_whyrl   _uri)r~   r   rl   r   r%   r   r&   r&   r'   r   ^  s    zBadSyntax.__init__c          
   C   sx   | j }| j}d}|dkr&d}|d }nd}t|| dkr@d}nd}d| jd | j| j|||| |||d  |f S )Nr   <   z...r"   z8at line %i of <%s>:
Bad syntax (%s) at ^ in:
"%s%s^%s%s"r*   )r  r  r1   rl   r  r  )r~   r   r%   r   prepostr&   r&   r'   r  f  s    
zBadSyntax.__str__c             C   s   t | S )N)rJ   )r~   r&   r&   r'   messagex  s    zBadSyntax.messageN)r  r  r  r   r  propertyr#  r&   r&   r&   r'   r   ]  s   c               @   sL   e Zd ZdZdd Zdd Zdd Zdd	d
ZdddZdd Z	dd Z
dS )Formular   c             C   sJ   t  j| _d| _t jd7  _tj| _i | _i | _t|j	| j
 d| _d S )Nr   r*   )r   
identifier)r   hexuuidcounterr%  numberexistentialsZ
universalsr   r   idquotedgraph)r~   parentr&   r&   r'   r     s    
zFormula.__init__c             C   s
   d| j  S )Nz_:Formula%s)r*  )r~   r&   r&   r'   r    s    zFormula.__str__c             C   s   t d| j S )Nz_:Formula%s)r   r*  )r~   r&   r&   r'   r,    s    z
Formula.idNc             C   sJ   |d kr,|  j d7  _ td| j| j f }nt|jdj jdd}|S )Nr*   zf%sb%sr!   r   r   )r)  r   r(  splitr   r:   )r~   r   r   bnr&   r&   r'   r     s
    zFormula.newBlankNodec             C   s   t |jdj S )Nr!   )r
   r/  r   )r~   r   r   r&   r&   r'   r     s    zFormula.newUniversalc             C   s   | j  | j|< d S )N)r   r+  )r~   r   r&   r&   r'   r     s    zFormula.declareExistentialc             C   s   | j S )N)r-  )r~   r&   r&   r'   r     s    zFormula.close)NN)N)r  r  r  r*  r   r  r,  r   r   r   r   r&   r&   r&   r'   r%    s   

r%  z([\x80-\xff])c               @   s   e Zd Zdd Zdd Zdd Zdd Zd d
dZdd Zdd Z	dd Z
dd Zd!ddZdd Zdd Zdd Zdd Zdd Zd	S )"RDFSinkc             C   s   d | _ d| _|| _d S )Nr   )rootFormular)  graph)r~   r3  r&   r&   r'   r     s    zRDFSink.__init__c             C   s   | j jjstt| j }|S )N)r3  r   formula_awarer/   r%  )r~   fr&   r&   r'   rz     s    
zRDFSink.newFormulac             C   s   t | jj|S )N)r   r3  r   )r~   r&  r&   r&   r'   newGraph  s    zRDFSink.newGraphc             G   s   t |d S )Nr   )r   )r~   rL   r&   r&   r'   rx     s    zRDFSink.newSymbolNc             C   sn   t |tr|j|S t |ts&|d krH|  jd7  _tdt| j }n"tt|d jdj j	dd}|S )Nr*   r  r   r!   r   r   )
r   r%  r   r   r)  r   rJ   r/  r   r:   )r~   argr   r   r0  r&   r&   r'   r     s    

"zRDFSink.newBlankNodec             C   s    |rt ||dS t ||dS d S )N)datatype)r   )r	   )r~   r;   r   r   r&   r&   r'   r     s    zRDFSink.newLiteralc       
      C   s   | j d}|s|S | j d}| j d}| j| }}xF|d d D ]6}| j||||f | j|}	| j||||	f |	}qBW | j||||d f | j||||f |S )Nz.http://www.w3.org/1999/02/22-rdf-syntax-ns#nilz0http://www.w3.org/1999/02/22-rdf-syntax-ns#firstz/http://www.w3.org/1999/02/22-rdf-syntax-ns#restr*   r   r   )rx   r   r   )
r~   r  r5  rE   rB   rC   afr`   neZanr&   r&   r'   r     s"    
zRDFSink.newListc             G   s   t |S )N)set)r~   rL   r&   r&   r'   r     s    zRDFSink.newSetc             G   s   dj dd |D S )Nr)   c             s   s   | ]}t |V  qd S )N)repr).0r  r&   r&   r'   	<genexpr>  s    z.RDFSink.setDefaultNamespace.<locals>.<genexpr>)r   )r~   rL   r&   r&   r'   r     s    zRDFSink.setDefaultNamespacec             C   s   |\}}}}t |drtd| j||}| j||}| j||}|| jkr`| jj|||f n.t|tr~|jj|||f n|j|||f d S )Nr   zFormula used as predicate)	hasattrrX   	normaliser2  r3  addr   r%  r-  )r~   r   r   r5  pr;   or&   r&   r'   r     s    


zRDFSink.makeStatementc             C   s   t |trtt|d S t |tr<tt|j td}|S t |t	sPt |t
rdtt|td}|S t |trt|}|dkrd}t|td}|S t |trtt|td}|S t |tr||jkr|j| S |S )Nr*   )r8  z-00)r   r  r   r   boolr	   rJ   r   BOOLEAN_DATATYPErV   r   INTEGER_DATATYPEr   DECIMAL_DATATYPErS   DOUBLE_DATATYPEr%  r+  )r~   r5  r  r;   valuer&   r&   r'   r@    s*    






zRDFSink.normalisec             C   s   |S )Nr&   )r~   Z	somethingr&   r&   r'   r     s    zRDFSink.internc             C   s   d S )Nr&   )r~   r   r   r&   r&   r'   rb   "  s    zRDFSink.bindc             C   s
   || _ d S )N)r2  )r~   r   r&   r&   r'   r   %  s    zRDFSink.startDocc             C   s   d S )Nr&   )r~   r   r&   r&   r'   r   (  s    zRDFSink.endDoc)NNN)N)r  r  r  r   rz   r6  rx   r   r   r   r   r   r   r@  r   rb   r   r   r&   r&   r&   r'   r1    s   


#r1  c             C   sT   d}xF| D ]>}t |dks&t |dk r4dt | }ndt | }|| }q
W t|S )zUse URL encoding to return an ASCII string
    corresponding to the given UTF8 string

    >>> hexify("http://example/a b")
    %(b)s'http://example/a%%20b'

    r"   ~   !   z%%%02Xz%c)r  r   )r  r;   r   r&   r&   r'   r   3  s    	
c               @   s"   e Zd ZdZdd Zd	ddZdS )
r   zK
    An RDFLib parser for Turtle

    See http://www.w3.org/TR/turtle/
    c             C   s   d S )Nr&   )r~   r&   r&   r'   r   N  s    zTurtleParser.__init__utf-8Tc       
      C   sz   |dkrt d| t|}|j|j p2|j p2d}t|||d}|j|j  x"|jj	 D ]\}}	|j
||	 q^W d S )Nutf-8*N3/Turtle files are always utf-8 encoded, I was passed: %sr"   )r   rt   )NrN  )rO  rP  )rX   r1  Z
absolutizegetPublicIdgetSystemIdr_   r   getByteStreamri   itemsrb   )
r~   sourcer3  encodingrt   sinkr   rB  r   	namespacer&   r&   r'   parseQ  s     
zTurtleParser.parseN)rM  T)r  r  r  __doc__r   rY  r&   r&   r&   r'   r   F  s   c               @   s"   e Zd ZdZdd ZdddZdS )	r   z`
    An RDFLib parser for Notation3

    See http://www.w3.org/DesignIssues/Notation3.html

    c             C   s   d S )Nr&   )r~   r&   r&   r'   r   m  s    zN3Parser.__init__utf-8c             C   sJ   |j jst|j jstt|j d}||_|j|_tj| |||dd d S )N)r   F)rt   )	r   Zcontext_awarer/   r4  r   Zdefault_contextZnamespace_managerr   rY  )r~   rU  r3  rV  Z
conj_graphr&   r&   r'   rY  p  s    zN3Parser.parseN)r[  )r  r  r  rZ  r   rY  r&   r&   r&   r'   r   d  s   c              C   s   dd l } | j  d S )Nr   )doctesttestmod)r\  r&   r&   r'   _test~  s    r^  c              C   s   t  } t| }dtjjtj tjd  }t||d}|j	d |j
d< |j  ttjd d}|j }|j  |j| |j  x| jdD ]}t| qW d S )Nzfile://r*   )r   r!   r"   rb)NNN)r   r1  r7   r3   r   r8   r  argvr_   ry   ri   r   openr   r   r   r   Zquadsprint)grW  Zbase_urirB  r5  Zrdbytesr   r&   r&   r'   main  s    
rd  __main__)r   )rZ  
__future__r   r   r   typesr  r7   rer]   r[   rQ   r   r(  r   Zrdflib.termr   r   r	   r
   r   r   Zrdflib.graphr   r   r   sixr   r   Zrdflib.compatr   r   r   r   r   __all__Zrdflib.parserr    r   r   r   r6   ZCONTEXTZPREDZSUBJOBJZPARTSZALL4SYMBOLZFORMULALITERALZ
LITERAL_DTZLITERAL_LANGZ	ANONYMOUSZ
XMLLITERALr   ZNODE_MERGE_URIZ
forSomeSymZ	forAllSymZRDF_type_URIZ
RDF_NS_URIZOWL_NSZDAML_sameAs_URIZparsesTo_URIZRDF_specZList_NSZ_Old_Logic_NSZN3_firstZN3_restZN3_liZN3_nilZN3_ListZN3_EmptyrH   r   rI   r   rw   Zchatty_flagrN   rO   ZN3_forSome_URIZN3_forAll_URIr   r   r   ZLOG_implies_URIrF  rH  rI  ZFLOAT_DATATYPErG  Zoption_noregenr   r   r   Z_rdfnsr   r   rZ   compiler   r   ZN3CommentCharacterr   r   r   signed_integerr   r   r   Zdigitstringr  r   r_   SyntaxErrorr   r   r%  Zr_hibyter1  r   r   r   r^  rd  r  r&   r&   r&   r'   <module>   s    

`









          "'
 
