3
d:              
   @   s  d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ d	gZd
Zi Zdd Zdd Zdd ZejfddZd@ddZG dd de Z!G dd de!Z"G dd de!Z#G dd de#Z$G dd de#Z%G dd  d e#Z&G d!d" d"e!Z'G d#d$ d$e#Z(G d%d& d&e!Z)e! Z*d'd( Z+dAd)d*Z,d+d, Z-dBd-d.Z.d/d0 Z/dCd1d2Z0dDd3d4Z1d5d6 Z2dEd7d8Z3e+e,e-e.e0e2e3e/e1d9	Z4d:d; Z5G d<d	 d	e Z6d=d> Z7e8d?kre7  dS )Fz[
A commandline tool for semi-automatically converting CSV to RDF

try: ``csv2rdf --help``

    )print_functionN)configparser)quote)	text_type)RDFRDFS)	split_uriCSV2RDFa^  
csv2rdf.py     -b <instance-base>     -p <property-base>     [-D <default>]     [-c <classname>]     [-i <identity column(s)>]     [-l <label columns>]     [-s <N>] [-o <output>]     [-f configfile]     [--col<N> <colspec>]     [--prop<N> <property>]     <[-d <delim>]     [-C] [files...]"

Reads csv files from stdin or given files
if -d is given, use this delimiter
if -s is given, skips N lines at the start
Creates a URI from the columns given to -i, or automatically by numbering if
none is given
Outputs RDFS labels from the columns given to -l
if -c is given adds a type triple with the given classname
if -C is given, the class is defined as rdfs:Class
Outputs one RDF triple per column in each row.
Output is in n3 format.
Output is stdout, unless -o is specified

Long options also supported:     --base,     --propbase,     --ident,     --class,     --label,     --out,     --defineclass

Long options --col0, --col1, ...
can be used to specify conversion for columns.
Conversions can be:
    ignore, float(), int(), split(sep, [more]), uri(base, [class]), date(format)

Long options --prop0, --prop1, ...
can be used to use specific properties, rather than ones auto-generated
from the headers

-D sets the default conversion for columns not listed

-f says to read config from a .ini/config file - the file must contain one
section called csv2rdf, with keys like the long options, i.e.:

[csv2rdf]
out=output.n3
base=http://example.org/
col0=split(";")
col1=split(";", uri("http://example.org/things/",
                    "http://xmlns.com/foaf/0.1/Person"))
col2=float()
col3=int()
col4=date("%Y-%b-%d %H:%M:%S")

c             C   sP   t jdd| } t jdd| } | jd} dj| d j gdd | d	d
 D  S )zb
    CamelCase + lowercase inital a string


    FIRST_NM => firstNm

    firstNm => firstNm

    z[^\w] z([a-z])([A-Z])z\1 \2 r   c             S   s   g | ]}|j  qS  )
capitalize).0xr   r   6/tmp/pip-build-7vycvbft/rdflib/rdflib/tools/csv2rdf.py
<listcomp>o   s    ztoProperty.<locals>.<listcomp>   N)resubsplitjoinlower)labelr   r   r   
toPropertyb   s    

r   c             C   s0   | dd j  s,| dd j | dd   S | S )Nr      r   )isupperr   )r   r   r   r   toPropertyLabelr   s    r   c                s   t  fdd|D S )zPreturn a set of indexes from a list
    >>> index([1,2,3],(0,2))
    (1, 3)
    c                s   g | ]} | qS r   r   )r   r   )lr   r   r   }   s    zindex.<locals>.<listcomp>)tuple)r   ir   )r   r   indexx   s    r    c             k   s8   t j| fd|i|}x|D ]}dd |D V  qW d S )Ndialectc             S   s   g | ]}t |d ddqS )zutf-8replace)errors)r   )r   cellr   r   r   r      s    zcsv_reader.<locals>.<listcomp>)csvreader)Zcsv_datar!   kwargs
csv_readerrowr   r   r   r(      s    

r(   c             C   sD   |r*t j|t| jdjdddd }n
t j| }||ft| < |S )Nutf8r
   _r   )safe)rdflibURIRefr   encoder"   uris)r   prefixclass_rr   r   r   	prefixuri   s    
r4   c               @   s   e Zd Zdd Zdd ZdS )	NodeMakerc             C   s   t jjS )N)r-   r   Literal)selfr   r   r   range   s    zNodeMaker.rangec             C   s
   t j|S )N)r-   r6   )r7   r   r   r   r   __call__   s    zNodeMaker.__call__N)__name__
__module____qualname__r8   r9   r   r   r   r   r5      s   r5   c               @   s$   e Zd Zdd Zdd Zdd ZdS )NodeUric             C   s"   || _ |rtj|| _nd | _d S )N)r1   r-   r.   r2   )r7   r1   r2   r   r   r   __init__   s    zNodeUri.__init__c             C   s   t || j| jS )N)r4   r1   r2   )r7   r   r   r   r   r9      s    zNodeUri.__call__c             C   s   | j ptjjS )N)r2   r-   r   Resource)r7   r   r   r   r8      s    zNodeUri.rangeN)r:   r;   r<   r>   r9   r8   r   r   r   r   r=      s   r=   c               @   s   e Zd ZdddZdS )NodeLiteralNc             C   s
   || _ d S )N)f)r7   rA   r   r   r   r>      s    zNodeLiteral.__init__)N)r:   r;   r<   r>   r   r   r   r   r@      s   r@   c               @   s   e Zd Zdd Zdd ZdS )	NodeFloatc             C   s>   | j stjt|S t| j r2tjt| j |S tdd S )Nz(Function passed to float is not callable)rA   r-   r6   floatcallable	Exception)r7   r   r   r   r   r9      s
    
zNodeFloat.__call__c             C   s   t jjS )N)r-   XSDdouble)r7   r   r   r   r8      s    zNodeFloat.rangeN)r:   r;   r<   r9   r8   r   r   r   r   rB      s   rB   c               @   s   e Zd Zdd Zdd ZdS )NodeIntc             C   s>   | j stjt|S t| j r2tjt| j |S tdd S )Nz&Function passed to int is not callable)rA   r-   r6   intrD   rE   )r7   r   r   r   r   r9      s
    
zNodeInt.__call__c             C   s   t jjS )N)r-   rF   rI   )r7   r   r   r   r8      s    zNodeInt.rangeN)r:   r;   r<   r9   r8   r   r   r   r   rH      s   rH   c               @   s   e Zd Zdd Zdd ZdS )NodeBoolc             C   s>   | j stjt|S t| j r2tjt| j |S tdd S )Nz'Function passed to bool is not callable)rA   r-   r6   boolrD   rE   )r7   r   r   r   r   r9      s
    
zNodeBool.__call__c             C   s   t jjS )N)r-   rF   rK   )r7   r   r   r   r8      s    zNodeBool.rangeN)r:   r;   r<   r9   r8   r   r   r   r   rJ      s   rJ   c               @   s   e Zd Zdd Zdd ZdS )NodeReplacec             C   s   || _ || _d S )N)ab)r7   rM   rN   r   r   r   r>      s    zNodeReplace.__init__c             C   s   |j | j| jS )N)r"   rM   rN   )r7   r   r   r   r   r9      s    zNodeReplace.__call__N)r:   r;   r<   r>   r9   r   r   r   r   rL      s   rL   c               @   s   e Zd Zdd Zdd ZdS )NodeDatec             C   s   t jtjj|| jS )N)r-   r6   datetimestrptimerA   )r7   r   r   r   r   r9      s    zNodeDate.__call__c             C   s   t jjS )N)r-   rF   ZdateTime)r7   r   r   r   r8      s    zNodeDate.rangeN)r:   r;   r<   r9   r8   r   r   r   r   rO      s   rO   c               @   s$   e Zd Zdd Zdd Zdd ZdS )	NodeSplitc             C   s   || _ || _d S )N)seprA   )r7   rS   rA   r   r   r   r>      s    zNodeSplit.__init__c                s:    j stj _ t j s td fdd|j jD S )Nz)Function passed to split is not callable!c                s&   g | ]}|j  d kr j|j  qS )r   )striprA   )r   y)r7   r   r   r      s    z&NodeSplit.__call__.<locals>.<listcomp>)rA   r-   r6   rD   rE   r   rS   )r7   r   r   )r7   r   r9      s
    
zNodeSplit.__call__c             C   s&   | j rt| j tr| j j S tj| S )N)rA   
isinstancer5   r8   )r7   r   r   r   r8      s    
zNodeSplit.rangeN)r:   r;   r<   r>   r9   r8   r   r   r   r   rR      s   rR   c              O   s   dS )Nignorer   )argsr'   r   r   r   _config_ignore   s    rY   c             C   s
   t | |S )N)r=   )r1   r2   r   r   r   _config_uri  s    rZ   c               C   s   t  S )N)r@   r   r   r   r   _config_literal  s    r[   c             C   s   t | S )N)rB   )rA   r   r   r   _config_float	  s    r\   c             C   s
   t | |S )N)rL   )rM   rN   r   r   r   _config_replace  s    r]   c             C   s   t | S )N)rH   )rA   r   r   r   _config_int  s    r^   c             C   s   t | S )N)rJ   )rA   r   r   r   _config_bool  s    r_   c             C   s   t | S )N)rO   )Zformat_r   r   r   _config_date  s    r`   c             C   s
   t | |S )N)rR   )rS   rA   r   r   r   _config_split  s    ra   )	rW   uriliteralrC   rI   dater   r"   rK   c             C   s
   t | tS )z$Return a function for column mapping)evalconfig_functions)vr   r   r   column-  s    rh   c               @   s$   e Zd Zdd Zdd Zdd ZdS )r	   c             C   sb   d | _ d | _d | _d| _d | _d| _d| _d| _d | _i | _	i | _
tjdtjdd| _d| _d S )NautoFr   ,zutf-8r"   )r#   )CLASSBASEPROPBASEIDENTLABELDEFINECLASSSKIPDELIMDEFAULTCOLUMNSPROPScodecs	getwritersysstdoutOUTtriples)r7   r   r   r   r>   4  s    zCSV2RDF.__init__c             C   s4   | j jd|j |j |j f  |  jd7  _d S )Nz%s %s %s .
r   )rz   writeZn3r{   )r7   spor   r   r   tripleG  s    "zCSV2RDF.triplec                sb  t j  } jr"tjjd jj   jdkrDt jt rD jf _ j	s`t
jd tjd _	 js|t
jd tjd _xt jD ]}t| qW t|j }tt fdd|D }x. jj D ] \}}|||< t|d	 ||< qW  jr j jtjtj xtt|D ]}|| ||  }	}
|	d
ks|
d
krFq j j!| j"dkr`q j|	tjtj#  j|	tj$tj%t&|
  j|	tj' j  j|	tj j j!|t(j  qW d}x|D ]}
y jdkr j	d|  }n" j	dj)dd t*|
 jD  } j+rB j|tj$tj%dj)t*|
 j+  jr\ j|tj j xt|
D ]\}}|j, }|d
krf j j!| j"dkrqfyZ j j!|tj%|}t|trx4|D ]} j||| | qW n j||| | W nD t-k
r8 } z&t
jdd||| ||j.f   W Y d d }~X nX qfW |d	7 }|d dkrvtjjd| j/t j  | f  W n   tjjd|   Y nX qW t0 }x^t1j D ]R\}
}|\}} j|tj$tj%|
 |rtj2|}|j3|  j|tj| qW x |D ]} j|tjtj q
W  jj4  tjjd| j/f  tjjdt j  |   d S )NzOutput to %s
ri   z2No base given, using http://example.org/instances/zhttp://example.org/instances/z:No property base given, using http://example.org/property/zhttp://example.org/props/c                s   g | ]} j t| qS r   )rm   r   )r   r   )r7   r   r   r   e  s    z#CSV2RDF.convert.<locals>.<listcomp>r   r   rW   r   z%dr+   c             S   s&   g | ]}t |jd jddddqS )r*   r
   r+   r   )r,   )r   r/   r"   )r   r   r   r   r   r     s   r
   z#Could not process value for column z%d:%s in row %d, ignoring: %s i z$%d rows, %d triples, elapsed %.2fs.
zError processing line: %d
z#Converted %d rows into %d triples.
zTook %.2f seconds.
)5timerz   rx   stderrr|   namern   rV   r   rl   warningswarnr-   	Namespacerm   r8   rq   nextlistdict	enumerateru   itemsr   rp   r   rk   r   typer   ZClasslenrt   getrs   ZPropertyr   r6   r   domaindefault_node_maker   r    ro   rT   rE   messager{   setr0   r.   addclose)r7   Z	csvreaderstartr   Zheader_labelsheaderskrg   r   hr   Zrowsrb   r   Z_oeclassesucr   )r7   r   convertK  s    




0



zCSV2RDF.convertN)r:   r;   r<   r>   r   r   r   r   r   r   r	   3  s   c              C   s2  t  } tjtjdd  dddddddd	d
ddg
\}}t|}d|ksNd|kr`tt tjd: d|krtj	 }|j
t|d  xB|jdD ]2\}}|dkrtj|dd| _q|dkrtj|| _q|dkrtj|| _q|dkrtj|| _q|dkrt|| _q|dkr$t|| _q|dkr:t|| _q|dkrL|| _q|dkrbt|| _q|dkrxt|| _q|jdrt|| j t|dd  < q|jdrtj|| j!t|dd  < qW d |krtj|d  dd| _d!|krtj|d! dd| _d"|kr tj|d" | _d#|kr:tj|d# | _d$|krN|d$ | _d%|krb|d% | _d&|krzt|d& | _d'|krt|d' | _d(|krtj|d( | _d)|krtj|d) | _d*|krt|d* | _d+|krt|d+ | _d,|krt|d, | _d-|kr&t|d- | _d.|kr>t|d. | _d/|krVt|d/ | _d0|krptj|d0 | _d1|krtj|d1 | _xf|j D ]Z\}}|jd2rt|| j t|d3d  < n(|jd4rtj|| j!t|d5d  < qW | jrd6|ksd7|krd8| _| j"t#t$j%|| jd9 d S );Nr   zhc:b:p:i:o:Cf:l:s:d:D:zout=zbase=zdelim=z	propbase=zclass=zdefault=ident=zlabel=zskip=Zdefineclasshelpz-hz--helpz-fcsv2rdfoutwzutf-8baseZpropbaseclassidentr   delimskipdefaultcol   prop   z-oz--outz-bz--basez-dz--delimz-Dz	--defaultz-pz
--propbasez-lz--labelz-iz--identz-sz--skipz-cz--classz--col   z--prop   z-Cz--defineclassT)	delimiter)&r	   getoptrx   argvr   printHELPexitr   ConfigParserreadfpopenr   rv   rz   r-   r   rl   rm   r.   rk   rK   rp   re   rn   ro   rr   rI   rq   rh   rs   
startswithrt   ru   r   r(   	fileinputinput)r   optsfilesconfigr   rg   r   r   r   main  s    








 



















"r   __main__)N)NN)N)N)N)NN)9__doc__
__future__r   rx   r   r%   r   r   rv   r   rP   r   r-   Z	six.movesr   Zsix.moves.urllib.parser   sixr   r   r   Zrdflib.namespacer   __all__r   r0   r   r   r    excelr(   r4   objectr5   r=   r@   rB   rH   rJ   rL   rO   rR   r   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rf   rh   r	   r   r:   r   r   r   r   <module>   st   >	
	




 	e
