
     iY                    x   U d dl mZ d dlZd dlmZ d dlmZ ej        dk    rd dlm	Z	 n	 d dl
m	Z	 n# e$ r d Z	Y nw xY wdd	lmZmZmZmZmZmZmZmZmZmZ dd
lmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& eez  ez  ez  ez  Z'de(d<    G d d          Z)e	 G d de)                      Z*e	 G d de)                      Z+e	 G d de)                      Z,e	 G d de)                      Z-e	 G d de)                      Z.e	 G d de)                      Z/e	 G d de)                      Z0e	 G d de)                      Z1e	 G d d e)                      Z2 ed!"          d5d(            Z3 e4d) e)5                                D                       Z6d*e(d+<    ed,"          	 d6d7d4            Z7dS )8    )annotationsN)	lru_cache)	getLogger)      )finalc                    | S N )clss    _/srv/www/vhosts/g4struct/public_html/venv/lib/python3.11/site-packages/charset_normalizer/md.pyr   r      s    J       )
COMMON_SAFE_ASCII_CHARACTERSTRACEUNICODE_SECONDARY_RANGE_KEYWORD_ACCENTUATED_CJK_HANGUL	_HIRAGANA	_KATAKANA_LATIN_THAI)_character_flagsis_accentuated	is_arabicis_arabic_isolated_formis_case_variableis_cjkis_emoticonis_latinis_punctuationis_separator	is_symbolis_unprintableremove_accentunicode_rangeis_cjk_uncommonint_GLYPH_MASKc                  F    e Zd ZdZdZddZdd	Zdd
Zedd            Z	dS )MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    r   	characterstrreturnboolc                    t           )z@
        Determine if given character should be fed in.
        NotImplementedErrorselfr-   s     r   eligiblezMessDetectorPlugin.eligible<   
     "!r   Nonec                    t           )z
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        r2   r4   s     r   feedzMessDetectorPlugin.feedB   s
    
 "!r   c                    t           )zB
        Permit to reset the plugin to the initial state.
        r2   r5   s    r   resetzMessDetectorPlugin.resetI   r7   r   floatc                    t           )z
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        r2   r<   s    r   ratiozMessDetectorPlugin.ratioO   s
     "!r   Nr-   r.   r/   r0   r-   r.   r/   r8   r/   r8   r/   r>   )
__name__
__module____qualname____doc__	__slots__r6   r:   r=   propertyr@   r   r   r   r,   r,   4   s         
 I" " " "" " " "" " " " " " " X" " "r   r,   c                  J    e Zd ZdZddZddZdd	Zdd
Zedd            Z	dS ) TooManySymbolOrPunctuationPlugin_punctuation_count_symbol_count_character_count_last_printable_char_frenzy_symbol_in_wordr/   r8   c                L    d| _         d| _        d| _        d | _        d| _        d S Nr   FrM   r<   s    r   __init__z)TooManySymbolOrPunctuationPlugin.__init__b   s0    '("#%&04!,1###r   r-   r.   r0   c                *    |                                 S r
   isprintabler4   s     r   r6   z)TooManySymbolOrPunctuationPlugin.eligiblej       $$&&&r   c                    | xj         dz  c_         || j        k    rk|t          vrbt          |          r| xj        dz  c_        nB|                                s.t          |          rt          |          s| xj        dz  c_        || _        d S )Nr      )	rP   rQ   r   r"   rN   isdigitr$   r    rO   r4   s     r   r:   z%TooManySymbolOrPunctuationPlugin.feedm   s    " 222!===i(( (''1,'''%%''(i((( $I..(
 ""a'""$-!!!r   c                0    d| _         d| _        d| _        d S Nr   )rN   rP   rO   r<   s    r   r=   z&TooManySymbolOrPunctuationPlugin.reset   s     "# !r   r>   c                ^    | j         dk    rdS | j        | j        z   | j         z  }|dk    r|ndS )Nr           333333?)rP   rN   rO   )r5   ratio_of_punctuations     r   r@   z&TooManySymbolOrPunctuationPlugin.ratio   sK     A%%3 #d&88!'" (<s'B'B##Kr   NrC   rA   rB   rD   
rE   rF   rG   rI   rU   r6   r:   r=   rJ   r@   r   r   r   rL   rL   X   s        I2 2 2 2' ' ' '. . . .$   
 L L L XL L Lr   rL   c                  J    e Zd ZdZddZddZdd	Zdd
Zedd            Z	dS )TooManyAccentuatedPluginrP   _accentuated_countr/   r8   c                "    d| _         d| _        d S r^   rf   r<   s    r   rU   z!TooManyAccentuatedPlugin.__init__   s    %&'(r   r-   r.   r0   c                *    |                                 S r
   )isalphar4   s     r   r6   z!TooManyAccentuatedPlugin.eligible   s      """r   c                h    | xj         dz  c_         t          |          r| xj        dz  c_        d S d S Nr   )rP   r   rg   r4   s     r   r:   zTooManyAccentuatedPlugin.feed   sJ    ")$$ 	)##q(####	) 	)r   c                "    d| _         d| _        d S r^   rf   r<   s    r   r=   zTooManyAccentuatedPlugin.reset   s     !"#r   r>   c                N    | j         dk     rdS | j        | j         z  }|dk    r|ndS )Nr   r`   gffffff?rf   )r5   ratio_of_accentuations     r   r@   zTooManyAccentuatedPlugin.ratio   s<     1$$3'+'>AV'V(=(E(E$$3Nr   NrC   rA   rB   rD   rc   r   r   r   re   re      s        :I) ) ) )# # # #) ) ) )$ $ $ $ O O O XO O Or   re   c                  J    e Zd ZdZddZddZdd	Zdd
Zedd            Z	dS )UnprintablePlugin_unprintable_countrP   r/   r8   c                "    d| _         d| _        d S r^   rr   r<   s    r   rU   zUnprintablePlugin.__init__   s    '(%&r   r-   r.   r0   c                    dS NTr   r4   s     r   r6   zUnprintablePlugin.eligible       tr   c                d    t          |          r| xj        dz  c_        | xj        dz  c_        d S rl   )r%   rs   rP   r4   s     r   r:   zUnprintablePlugin.feed   s@    )$$ 	)##q(##"r   c                    d| _         d S r^   )rs   r<   s    r   r=   zUnprintablePlugin.reset   s    "#r   r>   c                @    | j         dk    rdS | j        dz  | j         z  S )Nr   r`   r   )rP   rs   r<   s    r   r@   zUnprintablePlugin.ratio   s+     A%%3'!+t/DDDr   NrC   rA   rB   rD   rc   r   r   r   rq   rq      s        :I' ' ' '   # # # #
$ $ $ $ E E E XE E Er   rq   c                  J    e Zd ZdZddZddZdd	Zdd
Zedd            Z	dS )SuspiciousDuplicateAccentPlugin_successive_countrP   _last_latin_character_last_was_accentuatedr/   r8   c                >    d| _         d| _        d | _        d| _        d S rT   r}   r<   s    r   rU   z(SuspiciousDuplicateAccentPlugin.__init__   s(    &'%&15"+0"""r   r-   r.   r0   c                H    |                                 ot          |          S r
   )rj   r!   r4   s     r   r6   z(SuspiciousDuplicateAccentPlugin.eligible   s!      "":x	':'::r   c                d   | xj         dz  c_         t          |          }| j        {|ry| j        rr|                                r)| j                                        r| xj        dz  c_        t          |          t          | j                  k    r| xj        dz  c_        || _        || _        d S rl   )rP   r   r   r   isupperr~   r&   )r5   r-   current_accentuateds      r   r:   z$SuspiciousDuplicateAccentPlugin.feed   s    "$29$=$=&2# 3* 3   "" ,t'A'I'I'K'K ,&&!+&&Y''=9S+T+TTT&&!+&&%."%8"""r   c                >    d| _         d| _        d | _        d| _        d S rT   r}   r<   s    r   r=   z%SuspiciousDuplicateAccentPlugin.reset   s(    !" !%)"%*"""r   r>   c                @    | j         dk    rdS | j        dz  | j         z  S )Nr   r`   r[   )rP   r~   r<   s    r   r@   z%SuspiciousDuplicateAccentPlugin.ratio   s+     A%%3&*d.CCCr   NrC   rA   rB   rD   rc   r   r   r   r|   r|      s        I1 1 1 1; ; ; ;9 9 9 9 + + + + D D D XD D Dr   r|   c                  J    e Zd ZdZddZddZdd	Zdd
Zedd            Z	dS )SuspiciousRange"_suspicious_successive_range_countrP   _last_printable_seen_last_printable_ranger/   r8   c                >    d| _         d| _        d | _        d | _        d S r^   r   r<   s    r   rU   zSuspiciousRange.__init__  s(    78/%&04!15"""r   r-   r.   r0   c                *    |                                 S r
   rW   r4   s     r   r6   zSuspiciousRange.eligible	  rY   r   c                n   | xj         dz  c_         |                                st          |          s	|t          v rd | _        d | _        d S | j        || _        t          |          | _        d S | j        }t          |          }t          ||          r| xj        dz  c_        || _        || _        d S rl   )	rP   isspacer"   r   r   r   r'    is_suspiciously_successive_ranger   )r5   r-   unicode_range_aunicode_range_bs       r   r:   zSuspiciousRange.feed  s    " 	i((	 888(,D%)-D&F$,(1D%)6y)A)AD&F&*&@&3I&>&>+O_MM 	933q833$-!%4"""r   c                >    d| _         d| _        d | _        d | _        d S r^   )rP   r   r   r   r<   s    r   r=   zSuspiciousRange.reset&  s(     !23/$(!%)"""r   r>   c                D    | j         dk    rdS | j        dz  | j         z  }|S )N   r`   r[   )rP   r   )r5   ratio_of_suspicious_range_usages     r   r@   zSuspiciousRange.ratio,  s8     B&&3 3a7!2"' /.r   NrC   rA   rB   rD   rc   r   r   r   r   r      s        I6 6 6 6' ' ' '5 5 5 54* * * * / / / X/ / /r   r   c                  J    e Zd ZdZddZddZdd	Zdd
Zedd            Z	dS )SuperWeirdWordPlugin_word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchrP   _bad_character_count_buffer_length_buffer_last_char_buffer_last_char_accentuated_buffer_accent_count_buffer_glyph_count_buffer_upper_countr/   r8   c                    d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d | _        d| _	        d| _
        d| _        d| _        d S rT   r   r<   s    r   rU   zSuperWeirdWordPlugin.__init__J  sp     !$%() */!). %&)*!#$-138*)*!() ()   r   r-   r.   r0   c                    dS rv   r   r4   s     r   r6   zSuperWeirdWordPlugin.eligible\  rw   r   c                ^   |                                 r| xj        dz  c_        || _        |                                r| xj        dz  c_        t          |          }t          |t          z            }|| _        |r| xj	        dz  c_	        | j
        s|t          z  r|r|t          z  sd| _
        |t          z  r| xj        dz  c_        d S | j        sd S |                                st          |          st!          |          rP| j        rH| xj        dz  c_        | j        }| xj        |z  c_        |dk    r{| j	        |z  dk    rd| _        ne| j        r<| j                                        r#| j        |k    r| xj        dz  c_        d| _        n"| j        dk    rd| _        | xj        dz  c_        |dk    r9| j
        r2| j        dk    o| j        |z  dk    }|s| xj        dz  c_        d| _        | j        r'| xj        dz  c_        | xj        |z  c_        d| _        d| _
        d| _        d | _        d| _        d| _	        d| _        d| _        d S |d	vrJ|                                s8t1          |          r+d| _        | xj        dz  c_        || _        d| _        d S d S d S d S )
Nr   T         ?   r   ra   F>   _-<=>|~)rj   r   r   r   r   r   r0   r   r   r   r   r   r*   r   r   r"   r#   r   rP   r   r   r   r   r\   r$   )r5   r-   flagschar_accentuatedbuffer_lengthprobable_camel_caseds         r   r:   zSuperWeirdWordPlugin.feed_  sv    	1$%.D"  "" .((A-(())44E%)%,*>%?%?1AD. /))Q.)),0&.0-=0 ,0
 ,0({" .((A-((F" 	F5	7#1)#<#<5	7@LY@W@W5	7!5	7 !!%!4M!!]2!!!!,}<CC04D-- 6	2.6688	2 0MAA,,1,,04D---2204D-,,1,,""t'?",q0 H0=@CG %
 , 5,,1,,04D-( 2$$)$$))]:)),1)',D$"#D%)D"16D.()D%'(D$'(D$$$@@@%%'' A)$$ A )-D%1$%.D"16D... A@@@@@r   c                    d| _         d | _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _	        d| _
        d| _        d| _        d S rT   )r   r   r   r   r   r   r   rP   r   r   r   r   r   r<   s    r   r=   zSuperWeirdWordPlugin.reset  sp    !%-2*$)!#(   !$%!#$ $%!#$ #$   r   r>   c                P    | j         dk    r| j        dk    rdS | j        | j        z  S )N
   r   r`   )r   r   r   rP   r<   s    r   r@   zSuperWeirdWordPlugin.ratio  s3    r!!d&>!&C&C3(4+@@@r   NrC   rA   rB   rD   rc   r   r   r   r   r   8  s        I * * * *$   N7 N7 N7 N7`% % % % A A A XA A Ar   r   c                  N    e Zd ZdZdZddZdd	Zdd
ZddZe	dd            Z
dS )CjkUncommonPluginz<
    Detect messy CJK text that probably means nothing.
    rP   _uncommon_countr/   r8   c                "    d| _         d| _        d S r^   r   r<   s    r   rU   zCjkUncommonPlugin.__init__  s    %&$%r   r-   r.   r0   c                     t          |          S r
   )r   r4   s     r   r6   zCjkUncommonPlugin.eligible  s    i   r   c                h    | xj         dz  c_         t          |          r| xj        dz  c_        d S d S rl   )rP   r(   r   r4   s     r   r:   zCjkUncommonPlugin.feed  sI    "9%% 	  A%  F	 	r   c                "    d| _         d| _        d S r^   r   r<   s    r   r=   zCjkUncommonPlugin.reset  s     ! r   r>   c                T    | j         dk     rdS | j        | j         z  }|dk    r|dz  ndS )Nr   r`   r   r   r   )r5   uncommon_form_usages     r   r@   zCjkUncommonPlugin.ratio  sC     1$$3%)%9D<Q%Q ,?+D+D"R''#Mr   NrC   rA   rB   rD   )rE   rF   rG   rH   rI   rU   r6   r:   r=   rJ   r@   r   r   r   r   r     s          8I& & & &! ! ! !   ! ! ! ! N N N XN N Nr   r   c                  J    e Zd ZdZddZddZdd	Zdd
Zedd            Z	dS )ArchaicUpperLowerPlugin_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalrP   _last_alpha_seen_current_ascii_onlyr/   r8   c                h    d| _         d| _        d| _        d| _        d| _        d | _        d| _        d S )NFr   Tr   r<   s    r   rU   z ArchaicUpperLowerPlugin.__init__  s?    	45,23*890%&,0)-   r   r-   r.   r0   c                    dS rv   r   r4   s     r   r6   z ArchaicUpperLowerPlugin.eligible  rw   r   c                   |                                 ot          |          }| }|r{| j        dk    rp| j        dk    r0|                                s| j        s| xj        | j        z  c_        d| _        d| _        d | _        d| _        | xj	        dz  c_	        d| _        d S | j        r|
                                sd| _        | j        |                                r| j                                        s-|                                r@| j                                        r'| j        r| xj        dz  c_        d| _        nd| _        nd| _        | xj	        dz  c_	        | xj        dz  c_        || _        d S )Nr   @   Fr   Tr[   )rj   r   r   r\   r   r   r   r   r   rP   isasciir   islower)r5   r-   is_concerned	chunk_seps       r   r:   zArchaicUpperLowerPlugin.feed  s   &..00P5Ei5P5P**	 	=AA4::!))++ ;0 ; 88688 23D.34D0$(D!DI!!Q&!!'+D$F# 	-I,=,=,?,? 	-',D$ ,!!## 	"(=(E(E(G(G 	"!!##	"(,(=(E(E(G(G	" 9 %66!;66 %DII $DII!	",,1,, )r   c                h    d| _         d| _        d| _        d| _        d | _        d| _        d| _        d S )Nr   FT)rP   r   r   r   r   r   r   r<   s    r   r=   zArchaicUpperLowerPlugin.reset2  s?     !/0,-.*340 $	#'   r   r>   c                :    | j         dk    rdS | j        | j         z  S )Nr   r`   )rP   r   r<   s    r   r@   zArchaicUpperLowerPlugin.ratio;  s&     A%%37$:OOOr   NrC   rA   rB   rD   rc   r   r   r   r   r     s        I. . . .   (* (* (* (*T( ( ( ( P P P XP P Pr   r   c                  J    e Zd ZdZddZddZdd	Zdd
Zedd            Z	dS )ArabicIsolatedFormPluginrP   _isolated_form_countr/   r8   c                "    d| _         d| _        d S r^   r   r<   s    r   rU   z!ArabicIsolatedFormPlugin.__init__G  s    %&)*!!!r   c                "    d| _         d| _        d S r^   r   r<   s    r   r=   zArabicIsolatedFormPlugin.resetK  s     !$%!!!r   r-   r.   r0   c                     t          |          S r
   )r   r4   s     r   r6   z!ArabicIsolatedFormPlugin.eligibleO  s    ###r   c                h    | xj         dz  c_         t          |          r| xj        dz  c_        d S d S rl   )rP   r   r   r4   s     r   r:   zArabicIsolatedFormPlugin.feedR  sJ    ""9-- 	+%%*%%%%	+ 	+r   r>   c                >    | j         dk     rdS | j        | j         z  }|S )Nr   r`   r   )r5   isolated_form_usages     r   r@   zArabicIsolatedFormPlugin.ratioX  s,     1$$3%)%>AV%V""r   NrC   rA   rB   rD   )
rE   rF   rG   rI   rU   r=   r6   r:   rJ   r@   r   r   r   r   r   C  s        <I+ + + +& & & &$ $ $ $+ + + + # # # X# # #r   r      )maxsizer   
str | Noner   r/   r0   c                   | |dS | |k    rdS d| v rd|v rdS d| v sd|v rdS d| v sd|v r
d| v sd|v rdS |                      d          |                     d          }}|D ]}|t          v r||v r dS | dv |dv }}|s|r
d	| v sd	|v rdS |r|rdS d
| v sd
|v rd	| v sd	|v rdS | dk    s|dk    rdS d	| v sd	|v s| dv r&|dv r"d| v sd|v rdS d| v sd|v rdS | dk    s|dk    rdS dS )za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTFLatin	Emoticons	Combining )HiraganaKatakanaCJKHangulzBasic Latin)r   r   PunctuationForms)splitr   )r   r   keywords_range_akeywords_range_belrange_a_jp_charsrange_b_jp_charss          r   r   r   b  s    /"9t/))u/!!g&@&@uo%%)G)Gu 	?""g&@&@&&+*H*Hu 	c""c"" '
   000!!!55 "
 	
	

 	33 ' 	 ,   E_$<$<u , u?""h/&A&AO##u'?'?5m++-/O/O5 	  E_$<$<333777O++}/O/O5o%%O)C)C5m++-/O/O54r   c              #     K   | ]}|V  d S r
   r   .0md_classs     r   	<genexpr>r     s;       @ @H@ @ @ @ @ @r   z$tuple[type[MessDetectorPlugin], ...]_DETECTOR_CLASSESi   皙?Fdecoded_sequencer.   maximum_thresholdr>   debugc           	     n   d t           D             }t          |           }|dk     rd}n|dk     rd}nd}t          d||          D ]b}| |||z            D ]1}|D ],}|                    |          r|                    |           -2t          d |D                       }	|	|k    r nIc|D ],}|                    d	          r|                    d	           -t          d
 |D                       }	|rt          d          }
|
                    t          d| d|	 d|            |dk    rL|
                    t          d| dd                     |
                    t          d| dd                     |D ],}|
                    t          |j	         d|j
                    -t          |	d          S )zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    c                "    g | ]} |            S r   r   r   s     r   
<listcomp>zmess_ratio.<locals>.<listcomp>  s    *X*X*X(88::*X*X*Xr   i      r   r      r   c              3  $   K   | ]}|j         V  d S r
   r@   r   dts     r   r   zmess_ratio.<locals>.<genexpr>  $      ;;2bh;;;;;;r   
c              3  $   K   | ]}|j         V  d S r
   r  r  s     r   r   zmess_ratio.<locals>.<genexpr>  r
  r   charset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=   zStarting with: NzEnding with: iz: r   )r   lenranger6   r:   sumr   logr   	__class__r@   round)r   r   r   	detectorsseq_lenstepblock_startr-   detectormean_mess_ratiologgerr	  s               r   
mess_ratior    sJ    +Y*XFW*X*X*XI '((G}}	4Q.. < <)+d8J*JK 	- 	-I% - -$$Y// -MM),,,- ;;;;;;;///E 0 " 	$ 	$H  && $d###;;;;;;; =/00

5155 5HW5 5!25 5	
 	
 	
 R<<JJuG0@"0EGGHHHJJuG.>suu.EGGHHH 	= 	=BJJu;;;;<<<<!$$$r   )r   r   r   r   r/   r0   )r   F)r   r.   r   r>   r   r0   r/   r>   )8
__future__r   sys	functoolsr   loggingr   version_infotypingr   typing_extensionsImportErrorconstantr   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r*   __annotations__r,   rL   re   rq   r|   r   r   r   r   r   r   tuple__subclasses__r   r  r   r   r   <module>r*     s/   " " " " " " " 



            v+++++++   	 	 	 	 	                                                        & '>I-	9EA A A A A!" !" !" !" !" !" !" !"H 4L 4L 4L 4L 4L'9 4L 4L 4Ln O O O O O1 O O O: E E E E E* E E E4 -D -D -D -D -D&8 -D -D -D` :/ :/ :/ :/ :/( :/ :/ :/z JA JA JA JA JA- JA JA JAZ "N "N "N "N "N* "N "N "NJ SP SP SP SP SP0 SP SP SPl # # # # #1 # # #< 4F F F FT ;@% @ @/>>@@@ @ @ ; ;     
 4IN5% 5% 5% 5% 5% 5% 5%s   2 ==