3
d                 @   s  d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlZddlZddlZddlZddlZddlZddlZddlZdd	 Zd#ddZG dd deZdd ZdZdZd$ddZd%ddZd&ddZd'ddZd(d d!Zed"kreejj   dS ))z=Diagnostic functions, mainly for use when doing tech support.MIT    N)BytesIO)
HTMLParser)BeautifulSoup__version__)builder_registryc       
   .   C   s  t dt  t dtj  dddg}x>|D ]6}x0tjD ]}||jkr6P q6W |j| t d|  q*W d|kr|jd y*dd	l	m
} t d
djtt|j  W n* tk
r } zt d W Y dd}~X nX d|kryddl}t d|j  W n, tk
r } zt d W Y dd}~X nX t| dr2| j } x|D ]}t d|  d}yt| |d}	d}W n8 tk
r } zt d|  tj  W Y dd}~X nX |rt d|  t |	j  t dd  q8W dS )zDiagnostic suite for isolating common problems.

    :param data: A string containing markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %szhtml.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.zlxml-xmlr   )etreezFound lxml version %s.z.lxml is not installed or couldn't be imported.NzFound html5lib version %sz2html5lib is not installed or couldn't be imported.readz#Trying to parse your markup with %sF)featuresTz%s could not parse the markup.z#Here's what %s did with the markup:-P   )printr   sysversionr   Zbuildersr   removeappendr	   r
   joinmapstrZLXML_VERSIONImportErrorr   hasattrr   r   	Exception	traceback	print_excZprettify)
dataZbasic_parsersnameZbuilderr
   er   parsersuccesssoup r#   6/tmp/pip-build-8z3xcdsh/beautifulsoup4/bs4/diagnose.pydiagnose   sR    






r%   Tc             K   st   ddl m} |jdd}t| tr,| jd} t| }x:|j|f||d|D ]\}}td||j	|j
f  qNW dS )	a  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   )r
   recoverTutf8)htmlr&   z%s, %4s, %sN)r	   r
   pop
isinstancer   encoder   	iterparser   tagtext)r   r(   kwargsr
   r&   readereventelementr#   r#   r$   
lxml_traceN   s    

r3   c               @   s`   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd ZdS )AnnouncingParserzSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    c             C   s   t | d S )N)r   )selfsr#   r#   r$   _pl   s    zAnnouncingParser._pc             C   s   | j d|  d S )Nz%s START)r7   )r5   r   attrsr#   r#   r$   handle_starttago   s    z AnnouncingParser.handle_starttagc             C   s   | j d|  d S )Nz%s END)r7   )r5   r   r#   r#   r$   handle_endtagr   s    zAnnouncingParser.handle_endtagc             C   s   | j d|  d S )Nz%s DATA)r7   )r5   r   r#   r#   r$   handle_datau   s    zAnnouncingParser.handle_datac             C   s   | j d|  d S )Nz
%s CHARREF)r7   )r5   r   r#   r#   r$   handle_charrefx   s    zAnnouncingParser.handle_charrefc             C   s   | j d|  d S )Nz%s ENTITYREF)r7   )r5   r   r#   r#   r$   handle_entityref{   s    z!AnnouncingParser.handle_entityrefc             C   s   | j d|  d S )Nz
%s COMMENT)r7   )r5   r   r#   r#   r$   handle_comment~   s    zAnnouncingParser.handle_commentc             C   s   | j d|  d S )Nz%s DECL)r7   )r5   r   r#   r#   r$   handle_decl   s    zAnnouncingParser.handle_declc             C   s   | j d|  d S )Nz%s UNKNOWN-DECL)r7   )r5   r   r#   r#   r$   unknown_decl   s    zAnnouncingParser.unknown_declc             C   s   | j d|  d S )Nz%s PI)r7   )r5   r   r#   r#   r$   	handle_pi   s    zAnnouncingParser.handle_piN)__name__
__module____qualname____doc__r7   r9   r:   r;   r<   r=   r>   r?   r@   rA   r#   r#   r#   r$   r4   d   s   r4   c             C   s   t  }|j|  dS )zPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)r4   feed)r   r    r#   r#   r$   htmlparser_trace   s    rG   ZaeiouZbcdfghjklmnpqrstvwxyz   c             C   s>   d}x4t | D ](}|d dkr$t}nt}|tj|7 }qW |S )z#Generate a random word-like string.    r   )range_consonants_vowelsrandomchoice)lengthr6   itr#   r#   r$   rword   s    rS      c             C   s   dj dd t| D S )z'Generate a random sentence-like string. c             s   s   | ]}t tjd dV  qdS )rT   	   N)rS   rN   randint).0rQ   r#   r#   r$   	<genexpr>   s    zrsentence.<locals>.<genexpr>)r   rK   )rP   r#   r#   r$   	rsentence   s    rZ     c             C   s   dddddddg}g }x~t | D ]r}tjdd	}|dkrRtj|}|jd
|  q |dkrr|jttjdd q |dkr tj|}|jd|  q W ddj| d S )z+Randomly generate an invalid HTML document.pdivspanrQ   bscripttabler      z<%s>   rT   rJ   z</%s>z<html>
z</html>)rK   rN   rW   rO   r   rZ   r   )num_elementsZ	tag_nameselementsrQ   rO   Ztag_namer#   r#   r$   rdoc   s    

rg   順 c       
      C   s(  t dt  t| }t dt|  xdddgddgD ]z}d}y"tj }t||}tj }d}W n6 tk
r } zt d	|  tj  W Y d
d
}~X nX |r6t d||| f  q6W ddl	m
} tj }|j| tj }t d||   dd
l}	|	j }tj }|j| tj }t d||   d
S )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r	   r(   r   zhtml.parserFTz%s could not parse the markup.Nz"BS4+%s parsed the markup in %.2fs.r   )r
   z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)r   r   rg   lentimer   r   r   r   r	   r
   ZHTMLr   r   parse)
re   r   r    r!   ar"   r_   r   r
   r   r#   r#   r$   benchmark_parsers   s4    


rm   r	   c             C   sX   t j }|j}t| }tt||d}tjd||| tj	|}|j
d |jdd dS )z7Use Python's profiler on a randomly generated document.)bs4r   r    zbs4.BeautifulSoup(data, parser)Z
cumulativez_html5lib|bs42   N)tempfileNamedTemporaryFiler   rg   dictrn   cProfileZrunctxpstatsZStatsZ
sort_statsZprint_stats)re   r    Z
filehandlefilenamer   varsstatsr#   r#   r$   profile   s    

rx   __main__)T)rH   )rT   )r[   )rh   )rh   r	   )!rE   __license__rs   ior   html.parserr   rn   r   r   Zbs4.builderr   osrt   rN   rp   rj   r   r   r%   r3   r4   rG   rM   rL   rS   rZ   rg   rm   rx   rB   stdinr   r#   r#   r#   r$   <module>   s8   8
&



 

