
    ͚h9                         d Z ddlZddlmZmZmZmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlZddlmZ ddlZ G d d	          Z G d
 d          Z G d d          Zddee         defdZ G d d          ZdS )z
Cookie Banner Manager - Automated Cookie Consent Detection and Interaction

This module provides automated detection and interaction with cookie consent banners,
focusing on identifying and clicking appropriate buttons to accept cookies.
    N)LiteralOptionalDictList)PageElementHandle)BeautifulSoup)datetime)find_near_matchesc                   $    e Zd ZdZd Zd Zd ZdS )ChoiceButtonzA
    Structured choice button for clarity and extensibility.
    c                 >    || _         || _        || _        || _        d S N)elementselectortextscore)selfr   r   r   r   s        K/home/byschii/byschiidev/penelope/lib/highlevel/cookie_banner_manager_v2.py__init__zChoiceButton.__init__   s"     	


    c                 (    d| j         d| j        dS )NzCB(text=z, score=))r   r   r   s    r   __repr__zChoiceButton.__repr__   s    >$)>>tz>>>>r   c                 ,    | j         | j        | j        dS )Nr   r   r   r   r   s    r   __dict__zChoiceButton.__dict__    s     IZ
 
 	
r   N)__name__
__module____qualname____doc__r   r   r    r   r   r   r      sK           ? ? ?
 
 
 
 
r   r   c                   .    e Zd ZdededefdZdefdZdS )
TextScoresr   
text_score
htlm_scorec                 0    || _         || _        || _        d S r   )r   r&   
html_score)r   r   r&   r'   s       r   r   zTextScores.__init__(   s    	$$r   returnc                      | j         | j        z   S r   )r&   r)   r   s    r   r   zTextScores.score-   s    00r   N)r   r    r!   strfloatr   r   r#   r   r   r%   r%   '   sX        %S %e % % % % %
1u 1 1 1 1 1 1r   r%   c            	           e Zd ZdZdedefdZdefdZdefdZ	d Z
d	 Zd
 Zedded          deded          fd            ZdS )	Candidatezp
    Represents a candidate cookie banner element, with structured accept/reject buttons and serialization.
    r   visual_scorec                 Z    || _         || _        || _        g | _        g | _        d | _        d S r   )r   r   r0   accept_buttonsreject_buttonsr&   )r   r   r   r0   s       r   r   zCandidate.__init__5   s3     (  15r   buttonc                 :    | j                             |           d S r   )r2   appendr   r4   s     r   add_accept_buttonzCandidate.add_accept_button=       ""6*****r   c                 :    | j                             |           d S r   )r3   r6   r7   s     r   add_reject_buttonzCandidate.add_reject_button@   r9   r   c                     t          d | j        D                       }t          d | j        D                       }| j        r| j                                        nd}| j        |z   |z   |z   S )Nc              3   $   K   | ]}|j         V  d S r   r   .0btns     r   	<genexpr>z"Candidate.score.<locals>.<genexpr>D   $      "L"L39"L"L"L"L"L"Lr   c              3   $   K   | ]}|j         V  d S r   r>   r?   s     r   rB   z"Candidate.score.<locals>.<genexpr>E   rC   r   r   )sumr2   r3   r&   r   r0   )r   accept_buttons_scorereject_buttons_score_text_scores       r   r   zCandidate.scoreC   s|    ""L"L8K"L"L"LLL""L"L8K"L"L"LLL15Gdo++---a $88;OOR]]]r   c                 D    t          |                                           S r   )r,   r   r   s    r   r   zCandidate.__repr__I   s    4==??$$$r   c                     | j         | j        d | j        D             d | j        D             | j        r| j        j        dd| j        j        dnd dS )Nc                 6    g | ]}|                                 S r#   r   r?   s     r   
<listcomp>z&Candidate.__dict__.<locals>.<listcomp>Q        MMM#s||~~MMMr   c                 6    g | ]}|                                 S r#   rL   r?   s     r   rM   z&Candidate.__dict__.<locals>.<listcomp>R   rN   r   2fz -- )r   r0   r2   r3   r&   )r   r0   r2   r3   r&   r)   r   s    r   r   zCandidate.__dict__L   su      -MM9LMMMMM9LMMMcgcr|T_7___@Z____x|
 
 	
r   皙?
candidatestop_fractionr*   c                     | sg S |                      d d           t          dt          t          |           |z                      }t	          | d|dz                      S )a}  
        Sorts candidates by their score and trims the list to the top fraction.
        
        Args:
            candidates (List[Candidate]): List of Candidate objects to sort and trim.
            top_fraction (float): Fraction of top candidates to retain (between 0 and 1).

        Returns:
            List[Candidate]: Sorted and trimmed list of Candidate objects.
        c                 *    |                                  S r   r>   cs    r   <lambda>z4Candidate.sort_and_trim_candidates.<locals>.<lambda>f   s    aggii r   Tkeyreverse   N)sortmaxintlenlist)rR   rS   top_ns      r   sort_and_trim_candidatesz"Candidate.sort_and_trim_candidatesV   sn      	I 	//>>> As3z??\9::;; Jxax()))r   N)rQ   )r   r    r!   r"   r,   r-   r   r   r8   r;   r   r   r   staticmethodr   rc   r#   r   r   r/   r/   0   s         6 65 6 6 6 6+ + + + ++ + + + +^ ^ ^% % %
 
 
 * *T+-> *e *^bcn^o * * * \* * *r   r/   banner candidatesrR   beginc                    | st          d           d S t          d | D                       }t          d | D                       }t          d | D                       t	          |           z  }t          dt          t          j                    |z
             dt	          |            d| d|d	d
|d	d|d	           d S )NzNo candidates found.c              3   >   K   | ]}|                                 V  d S r   r>   r@   rW   s     r   rB   z(print_candidate_stats.<locals>.<genexpr>t   *      22!AGGII222222r   c              3   >   K   | ]}|                                 V  d S r   r>   ri   s     r   rB   z(print_candidate_stats.<locals>.<genexpr>u   rj   r   c              3   >   K   | ]}|                                 V  d S r   r>   ri   s     r   rB   z(print_candidate_stats.<locals>.<genexpr>v   rj   r   zElapsed z	 - Found  z - Visual Score min: z.2fz, avg: z, max: )printminr^   rE   r`   r,   r
   now)rR   rf   name	min_score	max_score	avg_scores         r   print_candidate_statsru   o   s.    $%%%22z22222I22z22222I22z22222S__DI	  dS%/00  d  d3z??  d  dT  d  dhq  d  d  d  H  d  d  d  U^  d  d  d  e  e  e  e  er   c                       e Zd ZdZd Zdedee         fdZde	de	de
e         fdZde	de
e         fd	Zd
e	defdZd
e	defdZd
e	dedefdZdS )CookieBannerManagerV2a(  
    Manages detection and interaction with cookie consent banners.
    
    This class focuses on identifying cookie banners and determining
    the appropriate action to take (accept/reject/customize).
    
    Usage:
    - find_cookie_banner_and_buttons() (returns JSON-serializable data)
    c           	         g dg dg dg dg dg dd}g d| _         g d	}|                                D ],}|D ]'}|D ]"}| j                             | d
| d           #(-g d| _        g d| _        g dg dg dg dg dg dd| _        g d| _        | j                                        D ]+}|D ]&}| j                            d| dd| dg           ',g dg dg dg dg dg dd| _        g d| _	        | j                                        D ]+}|D ]&}| j	                            d| dd| dg           ',g dg dg d g d!g d"g d#d| _
        g d$| _        | j
                                        D ]+}|D ]&}| j                            d| dd| dg           ',g d%| _        d&| _        d'S )(zLInitialize the cookie banner manager with enhanced selectors and heuristics.)cookiecookiesconsentzprivacy policyWe use cookiesThis website uses cookiestrackingdata protection)CookieCookiesEinwilligungDatenschutzzWir verwenden CookieszDiese Website verwendet Cookies)ry   rz   consentementu   politique de confidentialitézNous utilisons des cookies)ry   rz   consentimientou   política de privacidadzUtilizamos cookies)ry   rz   consensozinformativa sulla privacyzUtilizziamo i cookie)ry   rz   toestemmingprivacybeleidzWe gebruiken cookies)endefresitnl)@z#consentz#consent-bannerz#cookie-bannerz#cookie-consentz#cookie-noticez#cookieConsentz#cookieNoticez#onetrust-consent-sdkz#CybotCookiebotDialogz#usercentrics-rootz#didomi-hostz#truste-consent-trackz#quantcast-consent-bannerz.cookie-bannerz.cookie-consentz.cookie-noticez.cookies-bannerz.gdpr-consentz.js-cookie-bannerz.js-cookie-consentz
.cc-windowz
.cc-bannerz.message-cookiez.cookie-msgz
.cookieBarz.cookie-law-info-barz.osano-cm-windowz.cmp-banner_bannerz.fc-consent-rootz.qc-cmp2-containerz#cookie-policy-bannerz#privacy-noticez#gdpr-bannerz#consent-managerz.privacy-bannerz.gdpr-bannerz.consent-bannerz.cookie-overlayz.cookie-popupz.privacy-popupz.consent-popupz.cookie-modalz.privacy-modalz.consent-modalz.cookie-notificationz.privacy-notificationz[aria-label*='cookie' i]z[aria-label*='consent' i]z[aria-label*='privacy' i]z[data-testid*='cookie' i]z[data-testid*='consent' i]z[data-cy*='cookie' i]z[data-cy*='consent' i]z'[role='dialog'][aria-label*='cookie' i]z([role='dialog'][aria-label*='consent' i]z,[role='alertdialog'][aria-label*='cookie' i]z[id*='cookie' i]z[id*='consent' i]z[id*='privacy' i]z[id*='gdpr' i]z[class*='cookie' i]z[class*='consent' i]z[class*='gdpr' i]z[class*='privacy' i])divsectionasideheaderfooterz[class*='banner']z:has-text('z'))ry   rz   r{   privacygdprccpar~   	analyticsadvertisementpersonalization
functional	necessary	essentialperformance	marketingzsocial mediazthird partyaudiencer   zdata collectionz
data usagezuser experience)r|   r}   zWe and our partners use cookieszBy continuing to use this sitezBy using this site you agreez)Accept cookies to improve your experiencez"We use cookies to provide you withz$Cookies help us deliver our servicesz$This site uses cookies for analyticszWe use tracking technologieszBy clicking accept you consentzContinue without acceptingzManage your cookie preferencesz#You can change your cookie settingsz"For more information about cookieszRead our privacy policy)Acceptz
Accept AllzAccept All CookiesAllowz	Allow AllzAllow All CookiesAgreezI AgreezI AcceptOKzGot itContinueYesEnableProceedConsentzContinue to site)AkzeptierenzAlle akzeptierenEinverstanden	ZustimmenErlauben)AccepterzTout accepterz	J accepte	AutoriserzD accord)AceptarzAceptar todoAceptoPermitirz
De acuerdo)AccettazAccetta tuttoAccettoConsentiz	D accordoPermettiu   SìContinua)
AccepterenzAlles accepterenAkkoordToestaanEens)zbutton[id*='accept' i]zbutton[class*='accept' i]zbutton[id*='agree' i]zbutton[class*='agree' i]zbutton[id*='consent' i]zbutton[class*='consent' i]za[id*='accept' i]za[class*='accept' i]za[id*='agree' i]za[class*='agree' i]z.accept-cookiesz.accept-allz#accept-allz
.allow-allz
#allow-allz.cc-btn.cc-allowz.cc-btn.cc-dismissz.fc-button.fc-cta-consentz+.qc-cmp2-summary-buttons button:first-childz.osano-cm-accept-allz[data-testid='accept-all']z[data-cy='accept-all']z[data-qa='accept']zbutton[aria-label*='accept' i]zbutton[title*='accept' i]zbutton[value*='accept' i]z'[role='button'][aria-label*='accept' i]zbutton:has-text('za:has-text(')Rejectz
Reject AllzReject All CookiesDeclinezDecline AllNoz	No ThanksDenyRefuseDisagreeCancelClosezDo not consent)AblehnenzAlle ablehnenNein
VerweigernzNicht einverstanden)RefuserzTout refuserNonRejeterzPas d accord)RechazarzRechazar todor   DenegarzEn desacuerdo)RifiutazRifiuta tuttor   NegazNon accetto)WeigerenzAlles weigerenNeeAfwijzenOneens)zbutton[id*='reject' i]zbutton[class*='reject' i]zbutton[id*='decline' i]zbutton[class*='decline' i]za[id*='reject' i]za[class*='reject' i]z.reject-allz#reject-allz.decline-allz#decline-allz.cc-btn.cc-denyz .fc-button.fc-cta-do-not-consentz.osano-cm-deny-allz[data-testid='reject-all']z[data-cy='reject-all']z[data-qa='reject']zbutton[aria-label*='reject' i]zbutton[title*='reject' i])	CustomizezCustomize CookieszCookie PreferencesPreferencesSettingsManagezManage CookieszCookie SettingszMore OptionsAdvancedDetailsz
Learn MoreChooseSelect	ConfigurezManage options)AnpassenEinstellungen	Verwaltenu   PräferenzenzMehr OptionenKonfigurieren)Personnaliseru   Préférencesu   Paramètresu   GérerzPlus d options
Configurer)PersonalizarPreferenciasu   Configuración	Gestionaru   Más opciones
Configurar)Personalizza
PreferenzeImpostazioniGestisciu   Più opzioniConfigurare)	Aanpassen
VoorkeurenInstellingenBeherenzMeer optiesConfigureren)zbutton[id*='customize' i]zbutton[class*='customize' i]zbutton[id*='preferences' i]zbutton[class*='preferences' i]zbutton[id*='settings' i]zbutton[class*='settings' i]zbutton[id*='manage' i]zbutton[class*='manage' i]z.customize-cookiesz.cookie-settingsz.cc-btn.cc-customizez .fc-button.fc-cta-manage-optionsz.osano-cm-infoz[data-testid='customize']z[data-cy='customize']z[data-qa='preferences']z!button[aria-label*='customize' i]z#button[aria-label*='preferences' i]zbutton[title*='customize' i]zbutton[title*='preferences' i])
zdiv:has-text('We use cookies')z)div:has-text('This website uses cookies')z/div:has-text('We and our partners use cookies')z.div:has-text('By continuing to use this site')zdiv:has-text('Accept cookies')z!section:has-text('cookie policy')z aside:has-text('privacy policy')z*:has-text('GDPR')z*:has-text('data protection')z#*:has-text('tracking technologies')a
  
            element => {
                if (element.id) return `#${element.id}`;
                const parts = [];
                while (element && element.nodeType === Node.ELEMENT_NODE) {
                    let part = element.tagName.toLowerCase();
                    const siblings = Array.from(element.parentNode?.children || [])
                        .filter(n => n.tagName === element.tagName);
                    if (siblings.length > 1) {
                        const index = siblings.indexOf(element) + 1;
                        part += `:nth-of-type(${index})`;
                    }
                    parts.unshift(part);
                    element = element.parentElement;
                }
                return parts.join(" > ");
            }
      N)banner_selectorsvaluesr6   cookie_keywordscookie_phrasesaccept_textsaccept_button_selectorsextendreject_textsreject_button_selectorscustomize_textscustomize_button_selectorssemantic_selectorscss_selector_js_function)r   banner_content_keywordsbanner_elementslang_keywordskeywordr   
lang_textsr   s           r   r   zCookieBannerManagerV2.__init__   s   
O O O6 6 61 1 1) ) )+ + +___#
 #
&&!
 &!
 &!
X _^^4;;== 	U 	UM( U U. U UG)00G1S1S1S1S1STTTTUU 
  
  

 
 
,Z Z Z `__UUUQQQ& & &SSS

 

 (
 (
 (
$, +2244 	 	J"  ,330000+4+++5    % % % ]\\OOOQQQKKKMMM	
 	

(
 
(
 
(
$ +2244 	 	J"  ,330000+4+++5    % % % onnmmmpppkkkggg
 
 
 
+
 +
 +
' .5577 	 	J"  /660000+4+++8    # # #)
%%%r   pager*   c           	      
  K   t          j                    }d}d}g }| j        D ]}}|                    |           d{V }|D ]]}|                                 d{V s|                     ||           d{V }	|                    t          |||	                     ^~|sdS t          ||           t          	                    |d          }
g }|
D ]}|j
                                         d{V s"|                     |j
                   d{V }|                    t          |                     || d         }|r:|                    d d           |d|         D ]}|                    |           t          |          d	k    r$t          |          |k    rt!          d
            nt          |
|d           t          	                    t#          t%          d |
                    d          }|D ]}t'          |j                  D ]q}|                     |j
        |j
                   d{V }|rG|                    d d           t'          |d|                   D ]}|                    |           rt          ||d           t          	                    t#          t%          d |                    d          }|D ]r}t/          |j
                                         d{V |                     |j
                   d{V |                     |j
                   d{V           |_        st          ||d           t          	                    t#          t%          d |                    d          }t9          |          D ]i\  }}t!          d| d|            t!          d|j        d         j
                            | j                   d{V             t!          d           j|rt?          j         |          nd}t?          j!        |j                   t?          j!        |j"                   |st!          d           dS |j
                            | j                   d{V |j        d         j
                            | j                   d{V |j"        d         j
                            | j                   d{V d|j#        |j        j        |j        j$        |j        d         j%        |j"        d         j%        |%                                ddS )a+  
        loops over selectors to find candidate cookie banners and relative buttons.
        
        uses banner_selectors to spot every possible banner.
        
        then sorts candidates by smallest area + z index + position (in the middle) and takes the 30percentile.
        
        then every candidate is converted to html and parsed w bs4

        other euristics are applied to score the candidates:

        text requading cookies, privacy, consent, gdpr, ccpa

        presence of accept/reject/customize buttons inside the banner
              N333333?)rS   c                     | j         S r   r>   xs    r   rX   zFCookieBannerManagerV2.find_cookie_banner_and_buttons.<locals>.<lambda>  s    !' r   TrY   r\   zB  -  no accept buttons found in last 10 candidates, breaking earlyztop visual candidatesc                 2    t          | j                  dk    S Nr   )r`   r2   rV   s    r   rX   zFCookieBannerManagerV2.find_cookie_banner_and_buttons.<locals>.<lambda>  s    #a&6"7"7!"; r   c                     | j         S r   r>   r  s    r   rX   zFCookieBannerManagerV2.find_cookie_banner_and_buttons.<locals>.<lambda>  s     r   zcandidates with accept buttonsc                 b    t          | j                  dk    ot          | j                  dk    S r	  )r`   r2   r3   rV   s    r   rX   zFCookieBannerManagerV2.find_cookie_banner_and_buttons.<locals>.<lambda>  s,    #a&6"7"7"9"Uc!BR>S>STU>U r   )r   r&   r'   z)candidates with accept and reject buttonsc                 J    | j         o| j                                         dk    S r	  )r&   r   rV   s    r   rX   zFCookieBannerManagerV2.find_cookie_banner_and_buttons.<locals>.<lambda>  s     !,"K1<3E3E3G3G!3K r         ?rm   z - z     - r    z>No suitable cookie banner candidate found after full analysis.)banner_selectoraccept_button_selectorreject_button_selector)r0   r&   r)   accept_button_scorereject_button_scoretotal_score)	selectorsscores)&r
   rp   r   query_selector_all
is_visible_visual_heuristics_scoringr6   r/   ru   rc   r    _lookup_accept_buttons_in_bannerr`   r]   r8   rE   rn   ra   filtersetr2    _lookup_reject_buttons_in_bannerr;   r%   
inner_text_text_heuristics_html_heuristicsr&   	enumerateevaluater   randomchoiceshuffler3   r0   r)   r   )r   r   process_beginCOUNT_LAST_BUTTON_LENTOP_BUTTONS_COUNTbanner_candidatesr   elementsr   r0   top_candidatescount_of_last_accept_buttons	candidater2   rA   candidate_with_accept_buttonsreject_buttons_candidatesrbtn(candidate_with_accept_and_reject_buttons.great_candidate_with_accept_and_reject_buttonsirW   best_candidates                          r   find_cookie_banner_and_buttonsz4CookieBannerManagerV2.find_cookie_banner_and_buttons  s     " ! ! .0- 	 	H!44X>>>>>>>>H# 	 	$//11111111 %)%D%DWd%S%SSSSSSS!((<    	 ! 	4/???";; < 
 

 (*$ ( 	 	I"*5577777777 #'#H#HIZ#[#[[[[[[[N )//N0C0CDDD+GI^H^H_H_+`(  5##(9(94#HHH)*<+<*<= 5 5C//4444/00A55#>Z:[:[_t:t:tZ[[[nm=TUUU(1(J(J;;^LLMM )K )
 )
% 7 		: 		:I9344 : :262W2W%s{3 3 - - - - - -) - :-227H7HRV2WWW #$=>P?P>P$Q R R : :!33D9999: 	;]Llmmm3<3U3UUUWtuuvv 4V 4
 4
0
 B 	 	I#-&.99;;;;;;;;#'#8#89J#K#KKKKKKK#'#8#89J#K#KKKKKKK$ $ $I   	F  XC  	D  	D  	D9B9[9[KKMuvvww :\ :
 :
6 LMM 	 	DAq-a--A--   g!"21"5"="F"FtGd"e"eeeeeeegghhh"IIII [I  S'UVVV  OS~4555~4555 	RSSS4 *8)?)H)HIf)g)g#g#g#g#g#g#g0>0Ma0P0X0a0abfb  1A  1A  +A  +A  +A  +A  +A  +A0>0Ma0P0X0a0abfb  1A  1A  +A  +A  +A  +A  +A  +A  !/ ;,7B,7B'5'DQ'G'M'5'DQ'G'M-3355 
 
 	
r   banner_elementaccept_button_elementc           
      8  K   g }t          | j                  D ]\  }}|                    |           d{V }|D ]Z}|                                 d{V }g }	| j                                        D ].}
|
D ])}|	t          |                                |d          z  }	*/t          |	          }d}|r	 |	                                 d{V }|	                                 d{V }|rD|rB|d         |d         z
  dz  |d         |d         z
  dz  z   dz  }t          dd	|d
z  z
            }n# t          $ r Y nw xY w||z   |t          | j                  z  z   }|                    t          |||                                |                     \|S )u  
        works similarly to _lookup_accept_buttons_in_banner

        but it will also look for button close to the accept_button_element

        returns a list of dictionaries containing button element, selector, text, and score.
        score is based on text similarity to known reject phrases, proximity to accept button, and selector priority.

            Practical overall score (what you will normally see in real pages):
            Common: 0 — 6
            Good (clear reject label + near the accept button): ≈ 6 — 12
            Rare but possible: up to ~13 (e.g., exact reject-match + proximity_score ≈10 + selector fraction)        
        
        Nr\   
max_l_distr   r     yr  
   2   )r!  r   r  r  r   r   r   stripr`   bounding_boxr^   	Exceptionr6   r   )r   r6  r7  r3   r3  r   r*  r   text_contentdiststexts_in_langr   
dist_scoreproximity_score
accept_boxelement_boxdistancefinal_scores                     r   r  z6CookieBannerManagerV2._lookup_reject_buttons_in_banner  sI       02$T%ABB 	 	KAx+>>xHHHHHHHHH#  %,%7%7%9%9999999%)%6%=%=%?%? ] ]M - ] ]!2<3E3E3G3GZ[!\!\!\\] !ZZ
 #$( +@+M+M+O+O%O%O%O%O%O%O
,3,@,@,B,B&B&B&B&B&B&B% I+ I)3C;s;K)KPQ(QU_`cUdgrsvgwUw|}T}(}  CF  (FH.1!R(R-5G.H.HO$    )?:a#dFbBcBc>cd%% <+=+=+?+?    -6 s   7A:D22
D?>D?c           
        K   g }t          | j                  D ]H\  }}|                    |           d{V }|D ]#}|                                 d{V }g }| j                                        D ].}	|	D ])}
|t          |                                |
d          z  }*/t          |          }g }| j	                                        D ].}	|	D ])}
|t          |                                |
d          z  }*/|t          |          z  }||t          | j                  z  z   }|
                    t          |||                                |                     %J|S )u>  
        Looks for accept buttons within a given banner element using predefined selectors.
        
        returns a list of dictionaries containing button element, selector, text, and score.
        score is based on text similarity to known accept phrases and selector priority.

        score range
        Finite-theoretical 
            about [-38.0, +45.99] (or more precisely ≈ [-37.01, 45.99] if you account for max selector index).
        Practical-realistic: 
            roughly [-3, +7], with most useful values in 0..3 for typical accept buttons.

        Nr\   r9  )r!  r   r  r  r   r   r   r?  r`   r   r6   r   )r   r6  r2   r3  r   r*  r   rB  rC  rD  r   rE  	neg_distsrJ  s                 r   r  z6CookieBannerManagerV2._lookup_accept_buttons_in_banner?  s      02$T%ABB 	 	KAx+>>xHHHHHHHHH#  %,%7%7%9%9999999%)%6%=%=%?%? ] ]M - ] ]!2<3E3E3G3GZ[!\!\!\\] !ZZ
	%)%6%=%=%?%? a aM - a a!%6|7I7I7K7KT^_%`%`%``		a c)nn,
(AD4P0Q0Q,QR%%lX|'9'9';';[' '    %* r   r   c                   K   |                                  d {V }d}| j        D ]+}t          ||ddd          }|t          |          dz  z  },| j        D ]+}t          ||ddd          }|t          |          dz  z  },t          |pd          }|t          d|          z  dz  }|dk    r|d	k     r|d
z  }n#|dk    r|dk     r|dz  }n|dk     r|dk    r|dz  }||d	z   z  dz  S )N        r\   )max_substitutionsmax_insertionsmax_deletions      ?r;         @r    r  rQ   i  r  r  i  r=  gffffff?)r  r   r   r`   r   r^   )	r   r   element_textr   r   matchesphrasetext_lendensity_per_1000s	            r   r  z&CookieBannerManagerV2._text_heuristicsh  sU     $//11111111+ 	( 	(G'QRcdtuvvvGS\\C''EE) 	( 	(F'PQbcstuuuGS\\C''EE |)r** 3q(#3#33d: d??/!33SLEE^^ 01 4 4SLEE^^ 02 5 5TME	"Q&&r   c                 ,  K   |                                  d{V }t          |d          }g }t          |                    d                    D ]v\  }}|j        }d                    d |j                                        D                       }|                    | d| 	                                           |dk    r nwg }	|D ]}t          d          }
d}d	}t          | j                  D ]R\  }}t          j        j                            ||          }||
k     r"|}
|t          | j                  |z
  |d
z   z  z  }S|	                    |           |	r4t!          |	          t          |	          z  t          | j                  z  S dS )a  
        extracts only text that defindes a tag
        eg
            <div>text</div> -> "div"
            <span class="cookie">text</span> -> "span class=cookie"

        then makes another check on the tags found (cause i want to boost strong matches that where not caught by exact selectors)
        against self.banner_selectors using levenshtein distance

        returns a float (between 0 bad and 1 good) score indicating likelihood of being a cookie banner.
        Nzhtml.parserTrm   c              3   ,   K   | ]\  }}| d | dV  dS )z=""Nr#   )r@   kvs      r   rB   z9CookieBannerManagerV2._html_heuristics.<locals>.<genexpr>  s2      "N"NDAqa<<1<<<"N"N"N"N"N"Nr   r  infr   r\   )
inner_htmlr	   r!  find_allrq   joinattrsitemsr6   r?  r-   r   	rapidfuzzrI  Levenshteinr`   rE   )r   r   element_htmlelement_soupelement_tagsr3  tagtag_nameattr_stringordered_scoresbest_distancebest_match_indexaggregate_scorerefrI  s                  r   r   z&CookieBannerManagerV2._html_heuristics  s      %//11111111$\=AA 5 5d ; ;<< 	 	FAsxH(("N"NCIOO<M<M"N"N"NNNK8 ; ;k ; ; A A C CDDD1uu   		3 		3C!%LLM!O#D$9:: Y Y3$-9BB3LLm++$,M#D,A(B(BQ(F8VW<'XXO!!/2222 	Z~&&^)<)<<s4CX?Y?YYY	Z 	Zr   c                   K   	 d}|                                  d{V }|sdS |                                 d{V }|r&|d         dk    s|d         |d         dz
  k    r|dz  }|d	         |d	         z  }|d
k    r|dz  }n#|dk    r|dz  }n|dk    r|dz  }n|dk     r|dz  }|d         |d         z  }|dk    r|dz  }n!d|cxk    rdk    r	n n|dz  }n|dk     r|dz  }|d	         |d         z  }||d	         |d         z  z  }	d|	cxk    rdk    r	n n|dz  }n|	dk    r|dz  }|d         |d	         dz  z   }
t          |
|d	         dz  z
            |d	         dz  k    r|dz  }n3t          |
|d	         dz  z
            |d	         dz  k    r|dz  }n|dz  }	 |                    d           d{V }|r9t	          |                                          rt          |          dk    r|dz  }n# t          $ r Y nw xY w|d	         dk     s|d         dk     r|dz  }	 |                    d           d{V }|                    d           d{V pd|                    d            d{V pdg d!}|d"v p5t          fd#|D                       pt          fd$|D                       }|r|d%z  }n# t          $ r Y nw xY wt          |d&          S # t          $ r Y d&S w xY w)'u  
        Score a candidate banner element using visual heuristics.
        Inspired by _score_banner_candidate and _visual_banner_detection from v1.
        Returns a float score indicating likelihood of being a cookie banner.

        returns: value between 1.0 up to ≈8.7
            0.1: offscreen/hidden elements or unreachable nodes.
            1.0 2.5: weak matches or small/embedded banners, or elements penalized (container flag, too small, not centered).
            2.5 5.5: likely banners (bottom/top bars, moderate size, somewhat centered, have decent area).
            5.5 8.7: very strong matches (overlay modal, centered, correct size/area, high z-index, and placed at top/bottom).
        rN  Ng?r<  d   height   rS  widthgQ?g?g      ?g?rR  r  g333333?g      @g?g?g{Gz?rQ   r  r;  g333333?r  z(el => window.getComputedStyle(el).zIndexrT  r>  zel => el.tagName.toLowerCase()idr  class)bodymaincontentwrapper	containerr   approot)rz  r{  htmlc              3   D   K   | ]}|                                 v V  d S r   lower)r@   ind
element_ids     r   rB   zCCookieBannerManagerV2._visual_heuristics_scoring.<locals>.<genexpr>  s4      RRcz//111RRRRRRr   c              3   D   K   | ]}|                                 v V  d S r   r  )r@   r  element_classs     r   rB   zCCookieBannerManagerV2._visual_heuristics_scoring.<locals>.<genexpr>	  s4      UU}22444UUUUUUr   g      @r\   )r@  viewport_sizeabsr"  r,   isdigitr_   rA  get_attributeanyr^   )r   r   r   r   r@  viewportwidth_ratioheight_ratioarea
area_ratiocenter_xz_indexrl  container_indicatorsis_containerr  r  s                  @@r   r  z0CookieBannerManagerV2._visual_heuristics_scoring  sy     M	E ")!5!5!7!7777777L s "//11111111H !$++|C/@HXDVY\D\/\/\SLE&w/(72CCKd""##$$s""'1HX4FFLc!!+++++++++$$  (<+AAD'!2Xh5G!GHJz((((S(((((c!! $C(<+@1+DDH8hw/!33448IC8OOOX 1A 5566(7:Kc:QQQ ' 0 01[ \ \\\\\\\ !s7||3355 !#g,,:M:MSLE    G$s**l8.Dr.I.I!(!1!12R!S!SSSSSSS#*#8#8#>#>>>>>>>D"
&-&;&;G&D&D D D D D D D J'q'q'q$ 88 VRRRR=QRRRRRVUUUU@TUUUUU 
   !SLE    ua==  	 	 	11	sa   K" E;K" "AG9 8K" 9
HK" H K" 'BK K" 
KK" KK" "
K0/K0N)r   r    r!   r"   r   r   r   r   r5  r   r   r   r  r  r-   r  r   r  r#   r   r   rw   rw   z   sE        {
 {
 {
zE
 E
(4. E
 E
 E
 E
R.] .kx .  ~B  CO  ~P . . . .h&] &W[\hWi & & & &R'm ' ' ' ' 'D*Zm *Z *Z *Z *Z *Z^Y YD YUZ Y Y Y Y Y Yr   rw   )re   )r"   asynciotypingr   r   r   r   patchright.async_apir   r   bs4r	   r
   rf  fuzzysearchr   r#  r   r%   r/   ru   rw   r#   r   r   <module>r     s     0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5 5 5                 ) ) ) ) ) ) 
 
 
 
 
 
 
 
(1 1 1 1 1 1 1 1<* <* <* <* <* <* <* <*~e ed9o eh e e e eX
 X
 X
 X
 X
 X
 X
 X
 X
 X
r   