3
Pd5g                 @   s  d dl Z d dlZd dlmZ d dlZd dlZd dlZddddgZe j	dZ
e j	dZe j	d	Ze j	d
Ze j	dZe j	dZe j	dZe j	dZe j	dZe j	dZe j	dZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZd d! Zd"d# Z d$d% Z!d?d&d'Z"d(d) Z#d*d+ Z$d,d- Z%d.d/ Z&d0d1 Z'G d2d deZ(d3d Z)d4d5 Z*d6d7 Z+d8d9 Z,d:d; Z-d<e-_.e/d=krd dl0Z0e0j1d> Z2e-e2 dS )@    N)OrderedDictMetaDataloadarff	ArffErrorParseArffErrorz^\s*@z^%z^\s+$z^\s*@\S*z^@[Dd][Aa][Tt][Aa]z*^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)z2^\s*@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)z{(.+)}z%[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$z'(..+)'\s+(..+$)z(\S+)\s+(..+$)c               @   s   e Zd ZdS )r   N)__name__
__module____qualname__ r
   r
   J/var/www/html/virt/lib64/python3.6/site-packages/scipy/io/arff/arffread.pyr   7   s   c               @   s   e Zd ZdS )r   N)r   r   r	   r
   r
   r
   r   r   ;   s   c               @   s4   e Zd ZdZdd Zedd Zdd Zdd	 ZdS )
	AttributeNc             C   s   || _ d | _tj| _d S )N)namerangenpobject_dtype)selfr   r
   r
   r   __init__F   s    zAttribute.__init__c             C   s   dS )zj
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.
        Nr
   )clsr   attr_stringr
   r
   r   parse_attributeK   s    zAttribute.parse_attributec             C   s   dS )z-
        Parse a value of this type.
        Nr
   )r   data_strr
   r
   r   
parse_dataS   s    zAttribute.parse_datac             C   s   | j d | j S )z-
        Parse a value of this type.
        ,)r   	type_name)r   r
   r
   r   __str__Y   s    zAttribute.__str__)	r   r   r	   r   r   classmethodr   r   r   r
   r
   r
   r   r   B   s
   r   c                   sH   e Zd ZdZ fddZedd Zedd Zdd	 Z	d
d Z
  ZS )NominalAttributeZnominalc                s6   t  j| || _|| _tjtdd |D f| _d S )Nc             s   s   | ]}t |V  qd S )N)len).0ir
   r
   r   	<genexpr>h   s    z,NominalAttribute.__init__.<locals>.<genexpr>)superr   valuesr   r   Zstring_maxr   )r   r   r#   )	__class__r
   r   r   d   s    zNominalAttribute.__init__c             C   s4   t j| }|r(t|jd\}}t|S tddS )a  Given a string containing a nominal type, returns a tuple of the
        possible values.

        A nominal type is defined as something framed between braces ({}).

        Parameters
        ----------
        atrv : str
           Nominal type definition

        Returns
        -------
        poss_vals : tuple
           possible values

        Examples
        --------
        >>> get_nom_val("{floup, bouga, fl, ratata}")
        ('floup', 'bouga', 'fl', 'ratata')
           z(This does not look like a nominal stringN)	r_nominalmatchsplit_data_linegrouptuple
ValueError)atrvmattrs_r
   r
   r   _get_nom_valj   s
    
zNominalAttribute._get_nom_valc             C   s(   |d dkr | j |}| ||S dS dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For nominal attributes, the attribute string would be like '{<attr_1>,
         <attr2>, <attr_3>}'.
        r   {N)r1   )r   r   r   r#   r
   r
   r   r      s    	

z NominalAttribute.parse_attributec             C   s8   || j kr|S |dkr|S tdt|t| j f dS )z-
        Parse a value of this type.
        ?z%s value not in %sN)r#   r,   str)r   r   r
   r
   r   r      s    

zNominalAttribute.parse_datac             C   sR   | j d }x,tt| jd D ]}|| j| d 7 }qW || jd 7 }|d7 }|S )Nz,{r&   r   })r   r   r   r#   )r   msgr    r
   r
   r   r      s    
zNominalAttribute.__str__)r   r   r	   r   r   staticmethodr1   r   r   r   r   __classcell__r
   r
   )r%   r   r   `   s   r   c                   s8   e Zd Z fddZedd Zdd Zdd Z  ZS )	NumericAttributec                s   t  j| d| _tj| _d S )Nnumeric)r"   r   r   r   Zfloat_r   )r   r   )r%   r
   r   r      s    zNumericAttribute.__init__c             C   sX   |j  j }|dtd dksH|dtd dksH|dtd dkrP| |S dS dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For numeric attributes, the attribute string would be like
        'numeric' or 'int' or 'real'.
        Nr;   intreal)lowerstripr   )r   r   r   r
   r
   r   r      s    
z NumericAttribute.parse_attributec             C   s   d|krt jS t|S dS )a  
        Parse a value of this type.

        Parameters
        ----------
        data_str : str
           string to convert

        Returns
        -------
        f : float
           where float can be nan

        Examples
        --------
        >>> atr = NumericAttribute('atr')
        >>> atr.parse_data('1')
        1.0
        >>> atr.parse_data('1\n')
        1.0
        >>> atr.parse_data('?\n')
        nan
        r3   N)r   nanfloat)r   r   r
   r
   r   r      s    zNumericAttribute.parse_datac             C   s<   |j d |j d  }tj|tj|tj|tj|| fS )Ng      ?r&   )sizer   nanminnanmaxmeanstd)r   datanbfacr
   r
   r   _basic_stats   s    zNumericAttribute._basic_stats)	r   r   r	   r   r   r   r   rI   r9   r
   r
   )r%   r   r:      s   r:   c                   s(   e Zd Z fddZedd Z  ZS )StringAttributec                s   t  j| d| _d S )Nstring)r"   r   r   )r   r   )r%   r
   r   r      s    zStringAttribute.__init__c             C   s0   |j  j }|dtd dkr(| |S dS dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For string attributes, the attribute string would be like
        'string'.
        NrK   )r>   r?   r   )r   r   r   r
   r
   r   r      s    
zStringAttribute.parse_attribute)r   r   r	   r   r   r   r9   r
   r
   )r%   r   rJ      s   rJ   c                   sH   e Zd Z fddZedd Zedd Zdd Z fd	d
Z	  Z
S )DateAttributec                s8   t  j| || _|| _d| _|| _tjd| j| _d S )Ndater   )	r"   r   date_formatdatetime_unitr   r   r   
datetime64r   )r   r   rN   rO   )r%   r
   r   r     s    zDateAttribute.__init__c             C   s   t j| }|r|jdj }d }d|kr:|jdd}d}n|jdd}d}d|krb|jdd}d	}d
|krz|jd
d}d}d|kr|jdd}d}d|kr|jdd}d}d|kr|jdd}d}d|ksd|krtd|d krtd||fS tdd S )Nr&   Zyyyyz%YYyyz%yZMMz%mMddz%dDZHHz%Hhmmz%Mr.   ssz%SszZz6Date type attributes with time zone not supported, yetz"Invalid or unsupported date formatzInvalid or no date format)r_dater(   r*   r?   replacer,   )r-   r.   patternrO   r
   r
   r   _get_date_format
  s<    
zDateAttribute._get_date_formatc             C   sB   |j  j }|dtd dkr:| j|\}}| |||S dS dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For date attributes, the attribute string would be like
        'date <format>'.
        NrM   )r>   r?   r   r_   )r   r   r   attr_string_lowerrN   rO   r
   r
   r   r   1  s
    
zDateAttribute.parse_attributec             C   sT   |j  j dj d}|dkr*tjd| jS tjj|| j}tj|jd| j S dS )z-
        Parse a value of this type.
        '"r3   ZNaTzdatetime64[%s]N)r?   r   rP   rO   datetimestrptimerN   Zastype)r   r   Zdate_strdtr
   r
   r   r   C  s    
zDateAttribute.parse_datac                s   t t| j d | j S )Nr   )r"   rL   r   rN   )r   )r%   r
   r   r   O  s    zDateAttribute.__str__)r   r   r	   r   r8   r_   r   r   r   r   r9   r
   r
   )r%   r   rL      s
   'rL   c                   s<   e Zd Z fddZedd Zdd Z fddZ  ZS )	RelationalAttributec                s*   t  j| d| _tj| _g | _d | _d S )N
relational)r"   r   r   r   r   r   
attributesdialect)r   r   )r%   r
   r   r   U  s
    zRelationalAttribute.__init__c             C   s0   |j  j }|dtd dkr(| |S dS dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For date attributes, the attribute string would be like
        'date <format>'.
        Nrg   )r>   r?   r   )r   r   r   r`   r
   r
   r   r   \  s    
z#RelationalAttribute.parse_attributec                s   t ttj}|j jd}g }xB|jdD ]4}t|j\ _|j	t
 fdd|D  q0W tj|dd jD S )Nzunicode-escape
c                s    g | ]}j | j | qS r
   )rh   r   )r   r    )rowr   r
   r   
<listcomp>y  s    z2RelationalAttribute.parse_data.<locals>.<listcomp>c             S   s   g | ]}|j |jfqS r
   )r   r   )r   ar
   r
   r   rl   |  s    )listr   r   rh   encodedecodesplitr)   ri   appendr+   r   array)r   r   elemsZescaped_stringZ
row_tuplesrawr
   )rk   r   r   r   m  s    zRelationalAttribute.parse_datac                s(   t t| j d djdd | jD  S )Nz
	c             s   s   | ]}t |V  qd S )N)r4   )r   rm   r
   r
   r   r!     s    z.RelationalAttribute.__str__.<locals>.<genexpr>)r"   rf   r   joinrh   )r   )r%   r
   r   r   ~  s    zRelationalAttribute.__str__)	r   r   r	   r   r   r   r   r   r9   r
   r
   )r%   r   rf   S  s   rf   c             C   sD   t ttttf}x$|D ]}|j| |}|d k	r|S qW td| d S )Nzunknown attribute %s)r   r:   rL   rJ   rf   r   r   )r   r   Zattr_classesr   attrr
   r
   r   to_attribute  s    
rx   c              C   s8   t tdd} | dkr4tj jd}|jdkt_tj} | S )zL
    Checks if the bug https://bugs.python.org/issue30157 is unpatched.
    has_bugNz3, 'a'ra   )getattrcsv_sniffer_has_bug_last_fieldcsvSniffersniff	quotecharry   )ry   ri   r
   r
   r   r{     s    r{   c             C   s   t  rd}x8dd|dfD ](}tj|tjtjB }|j| }|rP qW ||krPdS |j}t|dksft|d }|d d }	||	 }
|d	 d }	||	 }|d
 d }	t	||	 }tjdtj
||
d tj}t	|j| }|
|_||kr||_||_||_dS )zT
    Workaround for the bug https://bugs.python.org/issue30157 if is unpatched.
    zG(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)zI(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)zG(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)z-(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)Nr&   r   quotedelimspacez]((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$))r   r   )r{   recompileDOTALL	MULTILINEfindall
groupindexr   AssertionErrorboolescapesearchr   	delimiterdoublequoteskipinitialspace)
sniff_lineri   
delimitersZright_regexrestrregexpmatchesr   r.   nr   r   r   	dq_regexpr   r
   r
   r   %workaround_csv_sniffer_bug_last_field  s<    

r   c                s   d}t jttjd
jd   d dkr6 d d   }t fdd|D sX|d7 }|d krt j j||d}t	|||d	 t
t j g|}||fS )Nz,	r&      rj   c             3   s   | ]}| kV  qd S )Nr
   )r   d)liner
   r   r!     s    z"split_data_line.<locals>.<genexpr>r   )r   )r   ri   r   r6   r6   r6   )r|   field_size_limitr<   ctypesc_ulongvalueanyr}   r~   r   nextreader)r   ri   r   r   rk   r
   )r   r   r)     s    r)   c             C   s   |j  }tj|}|rj|jd}tj|r@t|\}}t| }qvtj|r`t|\}}t| }qvt	dnt	d| t
||}|j dkrt| ||}||fS )ak  Parse a raw string in header (e.g., starts by @attribute).

    Given a raw string attribute, try to get the name and type of the
    attribute. Constraints:

    * The first line must start with @attribute (case insensitive, and
      space like characters before @attribute are allowed)
    * Works also if the attribute is spread on multilines.
    * Works if empty lines or comments are in between

    Parameters
    ----------
    attribute : str
       the attribute string.

    Returns
    -------
    name : str
       name of the attribute
    value : str
       value of the attribute
    next : str
       next line to be parsed

    Examples
    --------
    If attribute is a string defined in python as r"floupi real", will
    return floupi as name, and real as value.

    >>> iterable = iter([0] * 10) # dummy iterator
    >>> tokenize_attribute(iterable, r"@attribute floupi real")
    ('floupi', 'real', 0)

    If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
    and real as value.

    >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ")
    ('floupi 2', 'real', 0)

    r&   zmulti line not supported yetzFirst line unparsable: %srg   )r?   r_attributer(   r*   r_comattrvaltokenize_single_commar   r_wcomattrvaltokenize_single_wcommar,   rx   r>   read_relational_attribute)iterable	attributeZsattrZmattrr-   r   typeZ	next_itemr
   r
   r   tokenize_attribute  s     )







r   c             C   sb   t j| }|rNy |jdj }|jdj }W qZ tk
rJ   tdY qZX ntd|  ||fS )Nr&   r   z Error while tokenizing attributez Error while tokenizing single %s)r   r(   r*   r?   
IndexErrorr,   )valr.   r   r   r
   r
   r   r   8  s    
r   c             C   sb   t j| }|rNy |jdj }|jdj }W qZ tk
rJ   tdY qZX ntd|  ||fS )Nr&   r   z Error while tokenizing attributez Error while tokenizing single %s)r   r(   r*   r?   r   r,   )r   r.   r   r   r
   r
   r   r   G  s    
r   c             C   s~   t jd|j d }x\|j|sptj|}|rftj|}|rXt| |\}}|jj| qnt	d| qt
| }qW t
| }|S )z4Read the nested attributes of a relational attributez^@[Ee][Nn][Dd]\s*z\s*$zError parsing line %s)r   r   r   r(   r_headerliner   r   rh   rr   r,   r   )ofileZrelational_attributer    Zr_end_relationalr.   isattrrw   r
   r
   r   r   V  s    

r   c             C   s   t | }xtj|rt | }q
W d}g }x|tj|stj|}|rtj|}|rjt| |\}}|j| qtj|}|r|j	d}nt
d| t | }q*t | }q*W ||fS )z&Read the header of the iterable ofile.Nr&   zError parsing line %s)r   	r_commentr(   
r_datametar   r   r   rr   
r_relationr*   r,   )r   r    Zrelationrh   r.   r   rw   Zisrelr
   r
   r   read_headerl  s&    



r   c               @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )r   a  Small container to keep useful information on a ARFF dataset.

    Knows about attributes names and types.

    Examples
    --------
    ::

        data, meta = loadarff('iris.arff')
        # This will print the attributes names of the iris.arff dataset
        for i in meta:
            print(i)
        # This works too
        meta.names()
        # Getting attribute type
        types = meta.types()

    Methods
    -------
    names
    types

    Notes
    -----
    Also maintains the list of attributes in order, i.e., doing for i in
    meta, where meta is an instance of MetaData, will return the
    different attribute names in the order they were defined.
    c             C   s   || _ tdd |D | _d S )Nc             s   s   | ]}|j |fV  qd S )N)r   )r   rm   r
   r
   r   r!     s    z$MetaData.__init__.<locals>.<genexpr>)r   r   _attributes)r   relrw   r
   r
   r   r     s    zMetaData.__init__c             C   sj   d}|d| j  7 }xR| jD ]H}|d|| j| jf 7 }| j| jrZ|dt| j| j 7 }|d7 }qW |S )N zDataset: %s
z	%s's type is %sz, range is %srj   )r   r   r   r   r4   )r   r7   r    r
   r
   r   __repr__  s    zMetaData.__repr__c             C   s
   t | jS )N)iterr   )r   r
   r
   r   __iter__  s    zMetaData.__iter__c             C   s   | j | }|j|jfS )N)r   r   r   )r   keyrw   r
   r
   r   __getitem__  s    
zMetaData.__getitem__c             C   s
   t | jS )zReturn the list of attribute names.

        Returns
        -------
        attrnames : list of str
            The attribute names.
        )rn   r   )r   r
   r
   r   names  s    zMetaData.namesc                s    fdd j D }|S )zReturn the list of attribute types.

        Returns
        -------
        attr_types : list of str
            The attribute types.
        c                s   g | ]} j | jqS r
   )r   r   )r   r   )r   r
   r   rl     s   z"MetaData.types.<locals>.<listcomp>)r   )r   Z
attr_typesr
   )r   r   types  s    

zMetaData.typesN)
r   r   r	   __doc__r   r   r   r   r   r   r
   r
   r
   r   r     s   

c             C   s:   t | dr| }n
t| d}zt|S || k	r4|j  X dS )a  
    Read an arff file.

    The data is returned as a record array, which can be accessed much like
    a dictionary of NumPy arrays. For example, if one of the attributes is
    called 'pressure', then its first 10 data points can be accessed from the
    ``data`` record array like so: ``data['pressure'][0:10]``


    Parameters
    ----------
    f : file-like or str
       File-like object to read from, or filename to open.

    Returns
    -------
    data : record array
       The data of the arff file, accessible by attribute names.
    meta : `MetaData`
       Contains information about the arff file such as name and
       type of attributes, the relation (name of the dataset), etc.

    Raises
    ------
    ParseArffError
        This is raised if the given file is not ARFF-formatted.
    NotImplementedError
        The ARFF file has an attribute which is not supported yet.

    Notes
    -----

    This function should be able to read most arff files. Not
    implemented functionality include:

    * date type attributes
    * string type attributes

    It can read files with numeric and nominal attributes. It cannot read
    files with sparse data ({} in the file). However, this function can
    read files with missing data (? in the file), representing the data
    points as NaNs.

    Examples
    --------
    >>> from scipy.io import arff
    >>> from io import StringIO
    >>> content = """
    ... @relation foo
    ... @attribute width  numeric
    ... @attribute height numeric
    ... @attribute color  {red,green,blue,yellow,black}
    ... @data
    ... 5.0,3.25,blue
    ... 4.5,3.75,green
    ... 3.0,4.00,red
    ... """
    >>> f = StringIO(content)
    >>> data, meta = arff.loadarff(f)
    >>> data
    array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
          dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
    >>> meta
    Dataset: foo
    	width's type is numeric
    	height's type is numeric
    	color's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')

    readrtN)hasattropen	_loadarffclose)fr   r
   r
   r   r     s    F

c       	         s   yt | \} W n6 tk
rF } zdt| }t|W Y d d }~X nX d}x D ]}t|trRd}qRW t| }|r~tdt d
 fdd	}t	|| }t
j|dd	  D }||fS )Nz'Error while parsing header, error was: FTz*String attributes not supported yet, sorryr   c             3   s`   t t}d }xJ| D ]B}tj|stj|r0qt||\ }t fdd|D V  qW d S )Nc                s   g | ]} | j | qS r
   )r   )r   r    )rw   rk   r
   r   rl   a  s    z0_loadarff.<locals>.generator.<locals>.<listcomp>)rn   r   r   r(   r_emptyr)   r+   )Zrow_iterr   rt   ri   ru   )rw   ni)rk   r   	generatorG  s    
z_loadarff.<locals>.generatorc             S   s   g | ]}|j |jfqS r
   )r   r   )r   rm   r
   r
   r   rl   e  s    z_loadarff.<locals>.<listcomp>)r   )r   r,   r4   r   
isinstancerJ   r   NotImplementedErrorr   rn   r   rs   )	r   r   er7   hasstrrm   metar   rG   r
   )rw   r   r   r   (  s"    



r   c             C   s<   | j d | j d  }tj| tj| tj| tj| | fS )Ng      ?r&   )rB   r   rC   rD   rE   rF   )rG   rH   r
   r
   r   basic_statsl  s    r   c             C   sX   |j }|dks|dks|dkrHt|\}}}}td| |||||f  ntt| d S )Nr;   r=   integerz%s,%s,%f,%f,%f,%f)r   r   printr4   )r   tprG   r   minr$   rE   rF   r
   r
   r   print_attributeq  s
    r   c             C   sJ   t | \}}tt|j t|j x |D ]}t||| ||  q*W d S )N)r   r   r   r   rB   r   )filenamerG   r   r    r
   r
   r   	test_wekaz  s
    

r   F__main__r&   )N)3r   rc   collectionsr   Znumpyr   r|   r   __all__r   Zr_metar   r   r   r   r   r   r'   r\   r   r   IOErrorr   r   objectr   r   r:   rJ   rL   rf   rx   r{   r   r)   r   r   r   r   r   r   r   r   r   r   r   Z__test__r   sysargvr   r
   r
   r
   r   <module>   sZ   










K=S32
 DLQD		

