
     i 0                        d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZmZ  G d	 d
          Z G d d          Zeeef         Zee         Z G d d          ZdS )    )annotations)aliases)dumps)sub)AnyIteratorListTuple   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                     e Zd Z	 	 d0d1dZd2dZd2dZed3d            Zd4dZd4dZ	d5dZ
ed4d            Zed6d            Zed7d            Zed7d            Zed6d            Zed4d             Zed3d!            Zed3d"            Zed3d#            Zed3d$            Zed8d%            Zed9d'            Zed7d(            Zed6d)            Zed6d*            Zd:d;d-Zed<d/            ZdS )=CharsetMatchNpayloadbytesguessed_encodingstrmean_mess_ratiofloathas_sig_or_bombool	languagesCoherenceMatchesdecoded_payload
str | Nonepreemptive_declarationc                    || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        || _        d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfr   r   r   r   r   r   r   s           c/srv/www/vhosts/g4struct/public_html/venv/lib/python3.11/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   se      '.'6,5%315+-,/"-1,0#23I$$$    otherobjectreturnc                    t          |t                    s/t          |t                    rt          |          | j        k    S dS | j        |j        k    o| j        |j        k    S )NF)
isinstancer   r   r   encodingfingerprintr.   r2   s     r/   __eq__zCharsetMatch.__eq__)   s_    %.. 	%%% 9 ''4=885}.X43CuGX3XXr1   c                   t          |t                    st          t          | j        |j        z
            }t          | j        |j        z
            }|dk     r|dk    r| j        |j        k    S |dk     rC|dk    r=t          | j                  t          k    r| j        |j        k     S | j	        |j	        k    S | j        |j        k     S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r6   r   
ValueErrorabschaos	coherencelenr"   r   multi_byte_usage)r.   r2   chaos_differencecoherence_differences       r/   __lt__zCharsetMatch.__lt__0   s     %.. 	"%dj5;&>"?"?&)$.5?*J&K&K d""';d'B'B>EO33$$)=)E)E 4=!!%555zEK//(5+AAAzEK''r1   c                j    dt          t          |                     t          | j                  z  z
  S )Ng      ?)r@   r   rawr.   s    r/   rA   zCharsetMatch.multi_byte_usageF   s&    c#d))nns48}}455r1   c                ^    | j          t          | j        | j        d          | _         | j         S )Nstrict)r,   r   r"   r#   rG   s    r/   __str__zCharsetMatch.__str__J   s)    <t}dnhGGDL|r1   c                (    d| j          d| j         dS )Nz<CharsetMatch 'z' fp(z)>)r7   r8   rG   s    r/   __repr__zCharsetMatch.__repr__P   s     IIIT5EIIIIr1   Nonec                    t          |t                    r|| k    r't          d                    |j                            d |_        | j                            |           d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r6   r   r<   format	__class__r,   r(   appendr9   s     r/   add_submatchzCharsetMatch.add_submatchS   sk    %.. 	%4--MTTO    E"""""r1   c                    | j         S N)r#   rG   s    r/   r7   zCharsetMatch.encoding^   s
    ~r1   	list[str]c                    g }t          j                    D ]F\  }}| j        |k    r|                    |           &| j        |k    r|                    |           G|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr7   rQ   )r.   also_known_asups       r/   encoding_aliaseszCharsetMatch.encoding_aliasesb   sn    
 $&MOO 	( 	(DAq}!!$$Q''''!##$$Q'''r1   c                    | j         S rT   r&   rG   s    r/   bomzCharsetMatch.bomo       ##r1   c                    | j         S rT   r]   rG   s    r/   byte_order_markzCharsetMatch.byte_order_marks   r_   r1   c                $    d | j         D             S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                    g | ]
}|d          S )r    ).0es     r/   
<listcomp>z*CharsetMatch.languages.<locals>.<listcomp>}   s    ...!...r1   r%   rG   s    r/   r   zCharsetMatch.languagesw   s     /.do....r1   c                   | j         shd| j        v rdS ddlm}m} t          | j                  r || j                  n || j                  }t          |          dk    sd|v rdS |d         S | j         d         d         S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r%   could_be_from_charsetcharset_normalizer.cdrl   rm   r   r7   r@   )r.   rl   rm   r   s       r/   languagezCharsetMatch.language   s      	  $444 y XWWWWWWW *$-887%%dm444''66  9~~""my&@&@ yQ<q!!$$r1   c                    | j         S rT   )r$   rG   s    r/   r>   zCharsetMatch.chaos   s    $$r1   c                :    | j         sdS | j         d         d         S )Nr!   r   r   rh   rG   s    r/   r?   zCharsetMatch.coherence   s#     	3q!!$$r1   c                4    t          | j        dz  d          S Nd      )ndigits)roundr>   rG   s    r/   percent_chaoszCharsetMatch.percent_chaos   s    TZ#%q1111r1   c                4    t          | j        dz  d          S ru   )ry   r?   rG   s    r/   percent_coherencezCharsetMatch.percent_coherence   s    T^c)15555r1   c                    | j         S )z+
        Original untouched bytes.
        )r"   rG   s    r/   rF   zCharsetMatch.raw   s    
 }r1   list[CharsetMatch]c                    | j         S rT   )r(   rG   s    r/   submatchzCharsetMatch.submatch   s
    |r1   c                2    t          | j                  dk    S Nr   )r@   r(   rG   s    r/   has_submatchzCharsetMatch.has_submatch   s    4<  1$$r1   c                    | j         | j         S d t          |           D             }t          t          d |D                                 | _         | j         S )Nc                ,    g | ]}t          |          S rd   )r   )re   chars     r/   rg   z*CharsetMatch.alphabets.<locals>.<listcomp>   s     ,W,W,WT]4-@-@,W,W,Wr1   c                    h | ]}||S rd   rd   )re   rs     r/   	<setcomp>z)CharsetMatch.alphabets.<locals>.<setcomp>   s    +L+L+L!!+LA+L+L+Lr1   )r'   r   sortedlist)r.   detected_rangess     r/   	alphabetszCharsetMatch.alphabets   s^    +'',W,WSQUYY,W,W,W%d+L+L+L+L+L&M&MNN##r1   c                6    | j         gd | j        D             z   S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                    g | ]	}|j         
S rd   )r7   )re   ms     r/   rg   z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>   s    "D"D"D!1:"D"D"Dr1   )r#   r(   rG   s    r/   ro   z"CharsetMatch.could_be_from_charset   s%     "D"Dt|"D"D"DDDr1   utf_8r7   c                <     j          j         |k    r| _         t                     } j        K j                                        dvr0t	          t
           fd|dd         d          }||dd         z   }|                    |d           _         j        S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8r   c                &   | j         |                                 d         |                                 d                                      |                                 d         t	          j                                      dd                    S )Nr   r   _-)stringspanreplacegroupsr   r+   )r   r.   s    r/   <lambda>z%CharsetMatch.output.<locals>.<lambda>   sh    ahqvvxx{QVVXXa['@AII

1!$"788@@cJJ  r1   i    r   )countr   )r+   r   r-   lowerr   r   encoder*   )r.   r7   decoded_stringpatched_headers   `   r/   outputzCharsetMatch.output   s    
  (D,AX,M,M$,D! YYN,80668812 2 "%3    #5D5)" " " "0.2G!G#1#8#89#M#MD ##r1   intc                :    t          t          |                     S )z]
        Retrieve a hash fingerprint of the decoded payload, used for deduplication.
        )hashr   rG   s    r/   r8   zCharsetMatch.fingerprint   s    
 CIIr1   )NN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r2   r3   r4   r   )r4   r   r4   r   )r2   r   r4   rM   )r4   rU   r4   r   )r4   r   )r4   r~   )r   )r7   r   r4   r   r4   r   )__name__
__module____qualname__r0   r:   rD   propertyrA   rJ   rL   rR   r7   r[   r^   ra   r   rq   r>   r?   rz   r|   rF   r   r   r   ro   r   r8   rd   r1   r/   r   r      s        '+-1J J J J J8Y Y Y Y( ( ( (, 6 6 6 X6   J J J J	# 	# 	# 	#    X 
 
 
 X
 $ $ $ X$ $ $ $ X$ / / / X/ % % % X%6 % % % X% % % % X%
 2 2 2 X2 6 6 6 X6    X    X % % % X% $ $ $ X$ E E E XE$ $ $ $ $:    X  r1   r   c                  T    e Zd ZdZdddZddZddZddZddZddZ	ddZ
ddZdS )CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultslist[CharsetMatch] | Nonec                6    |rt          |          ng | _        d S rT   )r   _results)r.   r   s     r/   r0   zCharsetMatches.__init__   s    ?F,NF7OOOBr1   r4   Iterator[CharsetMatch]c              #  $   K   | j         E d {V  d S rT   r   rG   s    r/   __iter__zCharsetMatches.__iter__   s&      =         r1   item	int | strr   c                    t          |t                    r| j        |         S t          |t                    r't	          |d          }| j        D ]}||j        v r|c S t          )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r6   r   r   r   r   ro   KeyError)r.   r   results      r/   __getitem__zCharsetMatches.__getitem__   sv    
 dC   	'=&&dC   	"T5))D- " "6777!MMM 8r1   r   c                *    t          | j                  S rT   r@   r   rG   s    r/   __len__zCharsetMatches.__len__  s    4=!!!r1   r   c                2    t          | j                  dk    S r   r   rG   s    r/   __bool__zCharsetMatches.__bool__  s    4=!!A%%r1   rM   c                   t          |t                    s4t          d                    t	          |j                                      t          |j                  t          k     rB| j	        D ]:}|j
        |j
        k    r(|j        |j        k    r|                    |            dS ;| j	                            |           t          | j	                  | _	        dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r6   r   r<   rO   r   rP   r@   rF   r   r   r8   r>   rR   rQ   r   )r.   r   matchs      r/   rQ   zCharsetMatches.append  s    
 $-- 	?FF''    tx==+++  $(888U[DJ=V=V&&t,,,FFT"""t}--r1   CharsetMatch | Nonec                .    | j         sdS | j         d         S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rG   s    r/   bestzCharsetMatches.best(  s      } 	4}Qr1   c                *    |                                  S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rG   s    r/   firstzCharsetMatches.first0  s     yy{{r1   rT   )r   r   )r4   r   )r   r   r4   r   r   r   )r   r   r4   rM   )r4   r   )r   r   r   __doc__r0   r   r   r   r   rQ   r   r   rd   r1   r/   r   r      s         
O O O O O! ! ! !   " " " "& & & &. . . .(            r1   r   c                  6    e Zd ZddZedd            ZddZdS )CliDetectionResultpathr   r7   r   r[   rU   alternative_encodingsrq   r   r   r   r>   r   r?   unicode_pathis_preferredc                    || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        d S rT   )r   r   r7   r[   r   rq   r   r   r>   r?   r   )r.   r   r7   r[   r   rq   r   r   r>   r?   r   r   s               r/   r0   zCliDetectionResult.__init__<  s\     	(4$,+;0E"%$-$2!
 )".r1   r4   dict[str, Any]c                    | j         | j        | j        | j        | j        | j        | j        | j        | j        | j	        | j
        dS )Nr   r7   r[   r   rq   r   r   r>   r?   r   r   r   rG   s    r/   __dict__zCliDetectionResult.__dict__V  sO     I $ 5%)%?"1Z - -
 
 	
r1   c                0    t          | j        dd          S )NT   )ensure_asciiindent)r   r   rG   s    r/   to_jsonzCliDetectionResult.to_jsonf  s    T]a@@@@r1   N)r   r   r7   r   r[   rU   r   rU   rq   r   r   rU   r   r   r>   r   r?   r   r   r   r   r   )r4   r   r   )r   r   r   r0   r   r   r   rd   r1   r/   r   r   ;  sb        / / / /4 
 
 
 X
A A A A A Ar1   r   N)
__future__r   encodings.aliasesr   jsonr   rer   typingr   r   r	   r
   constantr   r   utilsr   r   r   r   r   r   r   CoherenceMatchr   r   rd   r1   r/   <module>r      s`   " " " " " " % % % % % %             - - - - - - - - - - - - G G G G G G G G C C C C C C C C C Ce e e e e e e eP@ @ @ @ @ @ @ @F sEz"' ,A ,A ,A ,A ,A ,A ,A ,A ,A ,Ar1   