3
dN                 @   s.  d Z ddlmZ ddlmZ ddlmZ ddddd	d
gZddlmZmZm	Z	 ddl
mZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd ZG dd deZG dd deZG dd deZG dd dZG dd deZdd Zdd Zd(ddZd d	 Ze Zd!d
 Z d"d# Z!d$d% Z"d&d' Z#dS ))a  
A collection of utilities for canonicalizing and inspecting graphs.

Among other things, they solve of the problem of deterministic bnode
comparisons.

Warning: the time to canonicalize bnodes may increase exponentially on
degenerate larger graphs. Use with care!

Example of comparing two graphs::

    >>> g1 = Graph().parse(format='n3', data='''
    ...     @prefix : <http://example.org/ns#> .
    ...     <http://example.org> :rel
    ...         <http://example.org/same>,
    ...         [ :label "Same" ],
    ...         <http://example.org/a>,
    ...         [ :label "A" ] .
    ... ''')
    >>> g2 = Graph().parse(format='n3', data='''
    ...     @prefix : <http://example.org/ns#> .
    ...     <http://example.org> :rel
    ...         <http://example.org/same>,
    ...         [ :label "Same" ],
    ...         <http://example.org/b>,
    ...         [ :label "B" ] .
    ... ''')
    >>>
    >>> iso1 = to_isomorphic(g1)
    >>> iso2 = to_isomorphic(g2)

These are not isomorphic::

    >>> iso1 == iso2
    False

Diff the two graphs::

    >>> in_both, in_first, in_second = graph_diff(iso1, iso2)

Present in both::

    >>> def dump_nt_sorted(g):
    ...     for l in sorted(g.serialize(format='nt').splitlines()):
    ...         if l: print(l.decode('ascii'))

    >>> dump_nt_sorted(in_both) #doctest: +SKIP
    <http://example.org>
        <http://example.org/ns#rel> <http://example.org/same> .
    <http://example.org>
        <http://example.org/ns#rel> _:cbcaabaaba17fecbc304a64f8edee4335e .
    _:cbcaabaaba17fecbc304a64f8edee4335e
        <http://example.org/ns#label> "Same" .

Only in first::

    >>> dump_nt_sorted(in_first) #doctest: +SKIP
    <http://example.org>
        <http://example.org/ns#rel> <http://example.org/a> .
    <http://example.org>
        <http://example.org/ns#rel> _:cb124e4c6da0579f810c0ffe4eff485bd9 .
    _:cb124e4c6da0579f810c0ffe4eff485bd9
        <http://example.org/ns#label> "A" .

Only in second::

    >>> dump_nt_sorted(in_second) #doctest: +SKIP
    <http://example.org>
        <http://example.org/ns#rel> <http://example.org/b> .
    <http://example.org>
        <http://example.org/ns#rel> _:cb558f30e21ddfc05ca53108348338ade8 .
    _:cb558f30e21ddfc05ca53108348338ade8
        <http://example.org/ns#label> "B" .
    )absolute_import)division)print_functionIsomorphicGraphto_isomorphic
isomorphicto_canonical_graph
graph_diffsimilar)GraphConjunctiveGraphReadOnlyGraphAggregate)BNodeNode)sha256)datetime)defaultdict)	text_typec             C   s.   | j d d d }|| j7 }|| jd 7 }|S )N   <   g    .A)dayssecondsmicroseconds)tdresult r   0/tmp/pip-build-7vycvbft/rdflib/rdflib/compare.py_total_secondsc   s    
r   c               @   s   e Zd Zdd Zdd ZdS )_runtimec             C   s
   || _ d S )N)label)selfr   r   r   r   __init__k   s    z_runtime.__init__c                s(   j d kr jd _  fdd}|S )Nr   c                 sH   t j } | |}d|krD|d d k	rD|d }tt j | |j< |S )Nstats)r   nowr   r   )argskwargsstartr   r"   )fr    r   r   	wrapped_fr   s    
z$_runtime.__call__.<locals>.wrapped_f)r   __name__)r    r'   r(   r   )r'   r    r   __call__n   s    
z_runtime.__call__N)r)   
__module____qualname__r!   r*   r   r   r   r   r   j   s   r   c               @   s   e Zd Zdd Zdd ZdS )_call_countc             C   s
   || _ d S )N)r   )r    r   r   r   r   r!   }   s    z_call_count.__init__c                s(   j d kr jd _  fdd}|S )Nr   c                 sL   d|krB|d d k	rB|d }j |kr0d|j < |j   d7  <  | |S )Nr"   r      )r   )r$   r%   r"   )r'   r    r   r   r(      s    

z'_call_count.__call__.<locals>.wrapped_f)r   r)   )r    r'   r(   r   )r'   r    r   r*      s    
z_call_count.__call__N)r)   r+   r,   r!   r*   r   r   r   r   r-   |   s   r-   c                   sP   e Zd ZdZ fddZdd Zdd Z fdd	ZdddZdddZ	  Z
S )r   a%  An implementation of the RGDA1 graph digest algorithm.

    An implementation of RGDA1 (publication below),
    a combination of Sayers & Karp's graph digest algorithm using
    sum and SHA-256 <http://www.hpl.hp.com/techreports/2003/HPL-2003-235R1.pdf>
    and traces <http://pallini.di.uniroma1.it>, an average case
    polynomial time algorithm for graph canonicalization.

    McCusker, J. P. (2015). WebSig: A Digital Signature Framework for the Web.
    Rensselaer Polytechnic Institute, Troy, NY.
    http://gradworks.umi.com/3727015.pdf
    c                s   t t| jf | d S )N)superr   r!   )r    r%   )	__class__r   r   r!      s    zIsomorphicGraph.__init__c             C   s2   t |tsdS t| t|kr"dS | j |j kS )zGraph isomorphism testing.F)
isinstancer   leninternal_hash)r    otherr   r   r   __eq__   s
    
zIsomorphicGraph.__eq__c             C   s   | j | S )z#Negative graph isomorphism testing.)r5   )r    r4   r   r   r   __ne__   s    zIsomorphicGraph.__ne__c                s   t t| j S )N)r/   r   __hash__)r    )r0   r   r   r7      s    zIsomorphicGraph.__hash__Nc             C   s   | j |dS )z*Synonym for IsomorphicGraph.internal_hash.)r"   )r3   )r    r"   r   r   r   graph_digest   s    zIsomorphicGraph.graph_digestc             C   s   t | j|dS )z
        This is defined instead of __hash__ to avoid a circular recursion
        scenario with the Memory store for rdflib which requires a hash lookup
        in order to return a generator of triples.
        )r"   )_TripleCanonicalizerto_hash)r    r"   r   r   r   r3      s    zIsomorphicGraph.internal_hash)N)N)r)   r+   r,   __doc__r!   r5   r6   r7   r8   r3   __classcell__r   r   )r0   r   r      s   
c               @   sL   e Zd Zf dfddZdd Zdd Zddd	Zd
d Zdd Zdd Z	dS )ColorNc             C   s.   |d kri }|| _ || _|| _|| _d | _d S )N)_hash_cachecolornodeshashfuncZ_hash_color)r    r@   rA   r?   
hash_cacher   r   r   r!      s    zColor.__init__c             C   s   | j  \}}d||f S )NzColor %s (%s nodes))key)r    r@   r?   r   r   r   __str__   s    zColor.__str__c             C   s   t | j| j fS )N)r2   r@   
hash_color)r    r   r   r   rC      s    z	Color.keyc                s   |d kr| j }|| jkr"| j| S dd  t|tr< |S d}x.|D ]&}|| jdj fdd|D 7 }qFW d| }|| j|< |S )Nc             S   s   t | tr| j S t| S d S )N)r1   r   n3r   )xr   r   r   	stringify   s    
z#Color.hash_color.<locals>.stringifyr    c                s   g | ]} |qS r   r   ).0rG   )rH   r   r   
<listcomp>   s    z$Color.hash_color.<locals>.<listcomp>z%x)r?   r>   r1   r   rA   join)r    r?   valuetriplevalr   )rH   r   rE      s    



&
zColor.hash_colorc       	         s   i }x| j D ]}t| j}xR j D ]H}| fdd|j|d |fD 7 }| fdd|j|d |fD 7 }q"W t|}| j|}||krtg | j|| jd}|||< || j j	| qW |j
 S )Nc                s    g | ]\}}}d | j  fqS )r.   )rE   )rJ   spo)Wr   r   rK      s   z%Color.distinguish.<locals>.<listcomp>c                s    g | ]\}}} j  |d fqS )   )rE   )rJ   rP   rQ   rR   )rS   r   r   rK      s   )rB   )r@   listr?   ZtriplestuplerE   r=   rA   r>   appendvalues)	r    rS   graphcolorsn	new_colornodeZnew_hash_colorcr   )rS   r   distinguish   s&    




zColor.distinguishc             C   s   t | jdkS )Nr.   )r2   r@   )r    r   r   r   discrete   s    zColor.discretec             C   s    t | jd d  | j| j| jdS )N)rB   )r=   r@   rA   r?   r>   )r    r   r   r   copy   s    z
Color.copy)N)
r)   r+   r,   r!   rD   rC   rE   r_   r`   ra   r   r   r   r   r=      s   	
r=   c               @   s   e Zd ZefddZdd Zdd Zdd Zd	d
 Zdd Z	e
ddddZdd ZdddZedddgfddZdddZdd ZdS ) r9   c                s"   || _  fdd}i | _|| _d S )Nc                s(     }|j t| jd t|j dS )Nutf8   )updater   encodeint	hexdigest)rP   h)rA   r   r   	_hashfunc  s    z0_TripleCanonicalizer.__init__.<locals>._hashfunc)rY   r>   rA   )r    rY   rA   ri   r   )rA   r   r!     s    z_TripleCanonicalizer.__init__c             C   s   t dd |D dkS )Nc             S   s   g | ]}|j  s|qS r   )r`   )rJ   r^   r   r   r   rK     s    z2_TripleCanonicalizer._discrete.<locals>.<listcomp>r   )r2   )r    coloringr   r   r   	_discrete  s    z_TripleCanonicalizer._discretec                s
  t  }t  }tt  _x jD ]\}}}t |||g}t dd |D }t|dkr||| O }||O }t|tr j| j| t|tr j| j| t|tr j| j|  j| j| qW t|dkrtt	| j
 jdg fdd|D  S g S dS )a\  Finds an initial color for the graph.

        Finds an initial color fo the graph by finding all blank nodes and
        non-blank nodes that are adjacent. Nodes that are not adjacent to blank
        nodes are not included, as they are a) already colored (by URI or literal)
        and b) do not factor into the color of any blank node.
        c             S   s   g | ]}t |tr|qS r   )r1   r   )rJ   rG   r   r   r   rK     s    z7_TripleCanonicalizer._initial_color.<locals>.<listcomp>r   )rB   c                s"   g | ]}t |g j| jd qS ))rB   )r=   rA   r>   )rJ   rG   )r    r   r   rK   *  s   N)setr   Z
_neighborsrY   r2   r1   r   addr=   rU   rA   r>   )r    ZbnodesZothersrP   rQ   rR   r@   br   )r    r   _initial_color  s*    





z#_TripleCanonicalizer._initial_colorc             C   sF   t |j}|jt|jf |jj| t|g| jt|| j	d}|S )N)rB   )
rU   r?   rW   r2   r@   remover=   rA   rV   r>   )r    r?   Z
individualr\   r^   r   r   r   _individuate0  s    

z!_TripleCanonicalizer._individuatec             c   sD   dd |D }x0dd |D D ]}x|j D ]}||fV  q*W qW d S )Nc             S   s   g | ]}|j  s|qS r   )r`   )rJ   r^   r   r   r   rK   :  s    z8_TripleCanonicalizer._get_candidates.<locals>.<listcomp>c             S   s   g | ]}|j  s|qS r   )r`   )rJ   r^   r   r   r   rK   ;  s    )r@   )r    rj   
candidatesr^   r]   r   r   r   _get_candidates9  s    z$_TripleCanonicalizer._get_candidatesc             C   sR  t |dd dd}|d d  }xt|dkr| j| r|j }x|d d  D ]}t|jdkspt|jd trNt |j|| jdd dd}|j	| |j
| y.|j|}|d | | ||d d   }W qN tk
r   |dd  | }Y qNX qNW q W g }t }xF|D ]>}	|	j }
|
|kr6||
 jj
|	j n|j|	 |	||
< qW |S )Nc             S   s   | j  S )N)rC   )rG   r   r   r   <lambda>@  s    z._TripleCanonicalizer._refine.<locals>.<lambda>T)rC   reverser   r.   c             S   s   | j  S )N)rC   )rG   r   r   r   rt   G  s    )sortedr2   rk   popr@   r1   r   r_   rY   rp   extendindex
ValueErrordictrE   rW   )r    rj   sequencerS   r^   rZ   siZcombined_colorsZcombined_color_mapr?   Z
color_hashr   r   r   _refine?  s2    


$


z_TripleCanonicalizer._refineZto_hash_runtimeNc             C   sP   d}x2| j |dD ]"}|| jdjdd |D 7 }qW |d k	rLd| |d< |S )Nr   )r"   rI   c             S   s   g | ]}|j  qS r   )rF   )rJ   rG   r   r   r   rK   `  s    z0_TripleCanonicalizer.to_hash.<locals>.<listcomp>z%xr8   )canonical_triplesrA   rL   )r    r"   r   rN   r   r   r   r:   \  s    "z_TripleCanonicalizer.to_hashc             C   sb   dd |D }xN| j |s\dd |D d }|jd }| j||}|j| | j||g}qW |S )Nc             S   s   g | ]}|j  qS r   )ra   )rJ   r^   r   r   r   rK   f  s    z;_TripleCanonicalizer._experimental_path.<locals>.<listcomp>c             S   s   g | ]}|j  s|qS r   )r`   )rJ   rG   r   r   r   rK   h  s    r   )rk   r@   rq   rW   r~   )r    rj   r?   r]   r\   r   r   r   _experimental_pathe  s    

z'_TripleCanonicalizer._experimental_pathc             C   sd   |st t}xRt| D ]F}tdd |D }x|D ]}||| O }q2W x|D ]}|||< qLW qW |S )Nc             S   s   g | ]}|j d  qS )r   )r@   )rJ   r^   r   r   r   rK   s  s    z:_TripleCanonicalizer._create_generator.<locals>.<listcomp>)r   rl   zip)r    Z	coloringsZ	groupingsgroupgr[   r   r   r   _create_generatoro  s    

z&_TripleCanonicalizer._create_generatorindividuationsr   c                sV  |d k	rd|krd|d< |d  d7  <  j |}g }d }d }d }d }	tt}
t }xd|D ]Z\}}||
kr|
| |@ }t|dkr|j| q\|j| g }d }x*|D ]"}|j }|j| ||kr|}qW  j||}|j|  j||g}t	dd |D } j
|}tdd |D }|	r8 j|	|g|
}
|}	|d ksP||k rd|g}|}|}|}q\||kr|d k	r|d  d7  < q\||kr|j| q\|d k	r\|d  d7  < q\W  fdd|D }t|dkrNd }d }x`|D ]X}|d g} j|||d}t	d	d |D }|d ks.||kr|g}|}|d }qW ||d< |d S )
NZpruningsr   r.   c             S   s   g | ]}|j  qS r   )rC   )rJ   r^   r   r   r   rK     s    z0_TripleCanonicalizer._traces.<locals>.<listcomp>c             S   s   g | ]}|j  qS r   )rC   )rJ   r^   r   r   r   rK     s    c                s   g | ]} j |r|qS r   )rk   )rJ   rG   )r    r   r   rK     s    )r"   depthc             S   s   g | ]}|j  qS r   )rC   )rJ   r^   r   r   r   rK     s    )rs   r   rl   r2   rm   ra   rW   rq   r~   rV   r   r   _traces)r    rj   r"   r   rr   bestZ
best_scoreZbest_experimentalZbest_experimental_scoreZlast_coloring	generatorvisited	candidater?   vZcoloring_copyZ
color_copyr^   Zc_copyr\   Zrefined_coloringZcolor_scoreZexperimentalZexperimental_scorer`   Z
best_depthdr   )r    r   r   z  s|    











z_TripleCanonicalizer._tracesc       	      c   sN  |d k	rt j }|d k	r t j }| j }|d k	rTt| j|d< tdt|d |d< | j||d d  }|d k	rtt j | |d< t||d< | j|sdg}| j	|||d}|d k	r|d |d< n|d k	rd|d	< d|d< |d k	rt||d
< t
dd |D }|d k	r"tt j | |d< x&| jD ]}t| j||}|V  q*W d S )NZtriple_countr   r.   Zadjacent_nodesZinitial_coloring_runtimeZinitial_color_count)r"   r   Z
tree_depthr   Zcolor_countc             S   s   g | ]}|j d  |j fqS )r   )r@   rE   )rJ   r^   r   r   r   rK     s    z:_TripleCanonicalizer.canonical_triples.<locals>.<listcomp>Zcanonicalize_triples_runtime)r   r#   ro   r2   rY   maxr~   r   rk   r   r{   rV   _canonicalize_bnodes)	r    r"   Zstart_canonicalizationZstart_coloringrj   r   Zbnode_labelsrN   r   r   r   r   r     s8    

z&_TripleCanonicalizer.canonical_triplesc             c   s8   x2|D ]*}t |tr*td||  dV  q|V  qW d S )Nzcb%s)rM   )r1   r   )r    rN   labelsZtermr   r   r   r     s    

z)_TripleCanonicalizer._canonicalize_bnodes)N)N)N)r)   r+   r,   r   r!   rk   ro   rq   rs   r~   r   r:   r   r   r-   r   r   r   r   r   r   r   r9      s   
"	

C
 r9   c             C   s6   t | tr| S t }t| dr*t| jd}|| 7 }|S )N
identifier)r   )r1   r   hasattrr   )rY   r   r   r   r   r     s    

c             C   s    t | j }t |j }||kS )a  Compare graph for equality.

    Uses an algorithm to compute unique hashes which takes bnodes into account.

    Examples::

        >>> g1 = Graph().parse(format='n3', data='''
        ...     @prefix : <http://example.org/ns#> .
        ...     <http://example.org> :rel <http://example.org/a> .
        ...     <http://example.org> :rel <http://example.org/b> .
        ...     <http://example.org> :rel [ :label "A bnode." ] .
        ... ''')
        >>> g2 = Graph().parse(format='n3', data='''
        ...     @prefix ns: <http://example.org/ns#> .
        ...     <http://example.org> ns:rel [ ns:label "A bnode." ] .
        ...     <http://example.org> ns:rel <http://example.org/b>,
        ...             <http://example.org/a> .
        ... ''')
        >>> isomorphic(g1, g2)
        True

        >>> g3 = Graph().parse(format='n3', data='''
        ...     @prefix : <http://example.org/ns#> .
        ...     <http://example.org> :rel <http://example.org/a> .
        ...     <http://example.org> :rel <http://example.org/b> .
        ...     <http://example.org> :rel <http://example.org/c> .
        ... ''')
        >>> isomorphic(g1, g3)
        False
    )r9   r:   )Zgraph1Zgraph2Zgd1Zgd2r   r   r   r     s    Nc             C   s$   t  }|t| j|d7 }t|gS )zCreates a canonical, read-only graph.

    Creates a canonical, read-only graph where all bnode id:s are based on
    deterministical SHA-256 checksums, correlated with the graph contents.
    )r"   )r   r9   r   r   )g1r"   rY   r   r   r   r     s    c             C   s2   t | }t |}|| }|| }|| }|||fS )zEReturns three sets of triples: "in both", "in first" and "in second".)r   )r   g2Zcg1Zcg2Zin_bothZin_firstZ	in_secondr   r   r   r	     s    c             C   s   t dd t| |D S )a@  Checks if the two graphs are "similar".

    Checks if the two graphs are "similar", by comparing sorted triples where
    all bnodes have been replaced by a singular mock bnode (the
    ``_MOCK_BNODE``).

    This is a much cheaper, but less reliable, alternative to the comparison
    algorithm in ``isomorphic``.
    c             s   s   | ]\}}||kV  qd S )Nr   )rJ   t1t2r   r   r   	<genexpr>7  s    zsimilar.<locals>.<genexpr>)all_squashed_graphs_triples)r   r   r   r   r   r
   -  s    
c             c   s6   x0t tt| tt|D ]\}}||fV  qW d S )N)r   rv   _squash_graph)r   r   r   r   r   r   r   r   :  s    $r   c             C   s   dd | D S )Nc             s   s   | ]}t |V  qd S )N)_squash_bnodes)rJ   rN   r   r   r   r   @  s    z _squash_graph.<locals>.<genexpr>r   )rY   r   r   r   r   ?  s    r   c             C   s   t dd | D S )Nc             s   s    | ]}t |trtp|V  qd S )N)r1   r   _MOCK_BNODE)rJ   tr   r   r   r   D  s    z!_squash_bnodes.<locals>.<genexpr>)rV   )rN   r   r   r   r   C  s    r   )N)$r;   
__future__r   r   r   __all__Zrdflib.graphr   r   r   Zrdflib.termr   r   hashlibr   r   collectionsr   sixr   r   objectr   r-   r   r=   r9   r   r   r   r	   r   r
   r   r   r   r   r   r   r   <module>K   s6   
-D h
$
