vegRdZdZddlZddlZddlZddlmZddlmZm Z m Z ddl m Z m Z ddl mZddlmZmZmZmZmZdd lmZddlZdd lmZdd lmZdd lmZdd lm Z m!Z!m"Z"ddl#m$Z$m%Z%m&Z&ddl'm(Z(ddl)m*Z*ddl+m,Z,ddl-m.Z.m/Z/m0Z0dZ1 ddl2m3Z4n#e5$rZ6e6Z1dZ4YdZ6[6ndZ6[6wwxYweGddZ7eGddZ8eGddZ9GddeZ:de;de:fdZ<GddeZ=e>d kr7e0ej?d!ej@"e=ZAeAdSdS)#z' This module contains Autotracer class z ssa.modulesN) defaultdict) dataclassfieldasdict)datetime timedelta) fnmatchcase)IteratorOptional NamedTupleListIterable)urlparse)setup_database)SSAError)Common)iter_domains_dataiter_urls_dataget_url_durations)statusdisabled_usersmisconfiguration_checks) load_tunables)autotracing_tunables_schema)autotracing_stats_file)is_xray_version_supportedis_kernel_version_supported sentry_initcZeZdZUdZeed<eed<eed<eed<eed<eed<eed<d S) URLzURL data containeruri avg_duration max_durationtotal_slow_reqs total_reqs belongs_tobelongs_to_userN)__name__ __module__ __qualname____doc__str__annotations__intM/opt/cloudlinux/venv/lib64/python3.11/site-packages/ssa/modules/autotracer.pyr r 3sc HHHOOOOOOr0r ceZdZUdZdZeed<dZeed<dZeed<dZ eed<dZ eed<dZ eed<dZ eed <dZ eed <dZeed <dZeed <dZeed <dS) RulesRejectszRejects counters containerrnon_wp throttleddensityslowness max_slownessdisablednginx frequency server_limit domain_limit no_domainN)r(r)r*r+r4r.r-r5r6r7r8r9r:r;r<r=r>r/r0r1r3r3?s$$FCOOOIsGSHcL#HcE3NNNIsL#L#Isr0r3c\eZdZUdZeed<dZeed<dZeed<e e Z e ed<dS) StatszAutoTracer statistics container rules_versionrurls_processed urls_selected)default_factoryrejectsN) r(r)r*r+r,r-rBr.rCrr3rEr/r0r1r@r@Osa))NCM3!E,???G\?????r0r@c(eZdZUdZeed<eed<dS)URLSz" Representation of an URL domain_nameuri_pathN)r(r)r*r+r,r-r/r0r1rGrGXs1MMMMMr0rGurlreturnct|}|jr d|jnd}|jr |j|nd}|jdd}|dd}t ||S)z Split URL into domain_name and uripath including query string :param url: URL of format protocol://domain/path;parameters?query#fragment :return: namedtuple URL(domain_name, uripath) ?/zwww.:r)rquerypathnetlocreplacesplitrG)rJ fragmentsqsr!_no_www_netloc_no_port_netlocs r1 url_splitrZ`s  I"+/ 9 Y_   rB%.^ Says if nginx is enabled for a specific user NF) skip_nginxrpNginxUserCache is_enabled)rjrs r1nginx_is_enabledzAutoTracer.nginx_is_enableds3 ? Au*955@ @ur0ctd|jD}|jd||j|z S)z>Recalculate limit per server taking into account running taskscBg|]}|ddk|ddk|S)rrunninguserz *autotracing*r/).0tasks r1 z.sM===d!(^y88T!'>),;><><"><><>>  B' ) ) )$}44r0 full_listc|.tj}|jd|t jt |jz fd|DS)zQ Excludes tasks older than N days from the general list of tasks NzTask list loaded %s)dayscg|]S}|d|dndtkQ|TS) createtimeNr)getr. timestamp)rr n_days_agos r1rz1AutoTracer.excludes_old_tasks..sixxx.2hh|.D.D.PDHH\***VW[^_i_s_s_u_u[v[vvvvvvr0)rprgrcrvrnowrmin_retracing_interval)rjrrs @r1excludes_old_taskszAutoTracer.excludes_old_taskss   +--I K  3Y ? ? ?\^^i,'.'.'.. xxxxxxx xr0 current_urlc\t|}|jD]}t|d}|j|jko|j|jk}t |j|jot |j|j}|s|r%|jd||ddSdS)z Excludes url from the list if it completely matches the current url or if domain names match and "*" follows the domain name in the list rJz2Skipped: URL %s was traced recently. Matched by %sTF)rZrgrHrIr rcrv)rjrc task_datat direct_matchwildcard_matchs r1exclude_thesame_urlszAutoTracer.exclude_thesame_urlss k " "  I)E*++A=AM9VajAJ>VL( q}..J2=aj>?j3J3J  ~  !!H5!1333tt   ur0url_total_reqsdomain_total_reqsc|jrMtjtj||}|jd|||jkSdS)z-Check that URL density passes given thresholdzCalculated density %sT)r6npamincorrcoefrcrvdensity_threshold)rjrr url_densitys r1pass_by_densityzAutoTracer.pass_by_densitys] < 8' N,=>>@@K K  5{ C C C!77 7tr0 url_durationscd|jz }tj||}|jd||||jkS)z The measure of "slowness" for URL is: at least N% of its requests take more than X seconds. N% -- self.slow_duration_percentage X -- self.slow_duration_threshold dz.Calculated %sth percentile %s for min duration)slow_duration_percentager percentilercrvslow_duration_threshold)rjrreversed_percentilereversed_percentile_values r1pass_by_slowness_percentilez&AutoTracer.pass_by_slowness_percentiles^"D$AA$&M-2E%G%G! J-3 5 5 5)D,HHHr0ctj||j}|jd|j|||jkS)a The opposite to pass_by_slowness_percentile method. The measure of "much slowness" for URL is: at least N% of its requests must take less than X seconds. N% -- self.max_slow_duration_percentage X -- self.max_slow_duration_threshold z.Calculated %sth percentile %s for max duration)rrmax_slow_duration_percentagercrvmax_slow_duration_threshold)rjrpercentile_values r1pass_by_max_slowness_percentilez*AutoTracer.pass_by_max_slowness_percentilesV=)-)JLL J;* , , , 4#CCCr0url_throttled_reqsc|dSt|t|z dz}|jd|||jkS)zY Check that percent of throttled requests per URL passes given threshold NFrzCalculated throttled percent %s)sumrcrvallowed_throttling_percentage)rjrrthrottled_percents r1pass_by_allowed_throttlingz%AutoTracer.pass_by_allowed_throttlingsX  %5 !344S5H5HH#M ;=NOOO D$FFFr0 wp_statusc2|jrt|SdS)zx Check that URLs of a particular domain should be analyzed. For now we skip non-wordpress sites T)only_wpbool)rjrs r1pass_by_enginezAutoTracer.pass_by_engines < # ?? "tr0c #Kt|jD]5}d}||jsQ|jd|j}|jjxj |z c_ |jxj |z c_ ptt|j|j }t|j|j t|D]b\}}||jvr|jd||jxj dz c_ ||d|ds5|jd|jjxjdz c_||d|js5|jd|jjxjdz c_||}||s6|jd |jjxjdz c_M||s6|jd |jjxjdz c_|Y||j }|6|jd |jjxjdz c_|j}t?|tAtCj"|tG|d tG|dtI||j | Vd7dS)a  Select all URLs suitable for auto tracing by very basic rules: - WP site - suitable throttling - suitable density - measure of "slow" URL ORDER OF RULES MUST NOT BE CHANGED: IT AFFECTS STATISTICS COUNTERS Nz Skipped by engine: non-wordpresszProcessing URL %srmrrzSkipped by throttled percentzSkipped by densityzSkipped by slowness percentilez"Skipped by max slowness percentilez"Skipped by unavailable domain info url_slow_reqs)r"r$r%r#r&r')%rriris_a_wordpress_domainrcrv urls_numberrhrEr4rBdictrrHrrfkeysnon_url_fieldsrrr5rrr6rr7rr8rr>rr r.rmeanrmax) rj domain_data domain_owner skipped_countdomain_url_durationsrJdata durationsrs r1urls_computationally_filteredz(AutoTracer.urls_computationally_filtered$s-T[99K 8K 8KL&&{'HII  !!"DEEE!, 7  "))]:)) ))]:))#'(9 [4)6)6$7$7 ,DK,7,CTJ^JcJcJeJeEfEfhh: 8: 8 T$--- !!"5s;;; ))Q.))66!566=M8NPPK%%&DEEEJ&00A500++D1A,B,7,IKKK%%&:;;;J&..!3..05 77 BBK%%&FGGGJ&//14//;;IFFK%%&JKKKJ&33q833 '"&"6"6{7N"O"OK"* ))*NOOO *44944 '2'7 #'*279+=+='>'>*-d?.C*D*D%(.>)?%@%@'*9~~%0%<*6 8888888i: 8#K 8K 8r0stats_collectedc#K||D]}|j|jvr;|jd|j|jjxjdz c_K| |jr;|jd|j|jjxj dz c_ |VdS)z From selected by computed thresholds URLs take those for which: - autotracing enabled - nginx disabled ORDER OF RULES MUST NOT BE CHANGED: IT AFFECTS STATISTICS COUNTERS z'Skipped: autotracing is disabled for %srmz Skipped: nginx is enabled for %sN) fill_in_disabled_usersrr'rrcrvrhrEr9rr:)rjrrJs r1rCzAutoTracer.urls_selectedzs ##%%%5577  C"d&999 !!"K"%"5777 "++q0++$$S%899  !!"D"%"5777 "((A-((IIII  r0c#Kd}tt}|}|jd|||_|}|D]}||j r|j j xj dz c_ 7||krv||j |jkr|dz }||j xxdz cc<|Vr|jd|j |j|j j xjdz c_|jd|j ||j j xjdz c_dS)a= Schedule autotracing by sorted list taking into account the limits: - no same task for 10 days - limit per server - limit per domain ORDER OF RULES MUST NOT BE CHANGED: IT AFFECTS STATISTICS COUNTERS Return resulting list of URLs scheduled for auto tracing rzSorted scheduled list %srmz#Skipped URL %s by domain limit (%s)z#Skipped URL %s by server limit (%s)N)rr. urls_sortedrcrvrrgrrr!rhrEr;r&per_domain_limitr=r<)rjgeneral_tasks_countertasks_counter_per_domain sorted_urlsper_server_smart_limitrJs r1rxzAutoTracer.urls_scheduleds!"#.s#3#3 &&((  4kBBB1133!%!C!C!E!E 5 5C((11  ",,1,,$'===+CN;d>SSS)Q.),S^<<<A<<<IIIIK%%&K&)g&*&;===J&33q8333 !!"G"%'"8::: "//14///1 5 5r0ctt|dd}t|ddS)zd Sort URLs by total number of requests first and by average duration second c|jSN)r"us r1z(AutoTracer.urls_sorted..s1>r0T)keyreversec|jSr)r%rs r1rz(AutoTracer.urls_sorted..sr0)sortedrfrC)rj first_series r1rzAutoTracer.urls_sorteds[ T$"4"4"6"677!9!94III k'='="$$$ $r0c,t|_dS)z6 Fill internal list of disabled users N)r)rjs r1rz!AutoTracer.fill_in_disabled_userss-..r0cD|j ttd5}tjt |j|ddddS#1swxYwYdS#t $r3}|jdt|Yd}~dSd}~wwxYwdS)zI Save collected statistics for current iteration to file Nwz*Unable to save iteration stats to file: %s) rhopenrjsondumprOSErrorrcwarningr,)rj stats_filer~s r1r{zAutoTracer.save_iteration_statss : ! J0#66>*IfTZ00*===>>>>>>>>>>>>>>>>>> J J J ##@#a&&JJJJJJJJJ J " !s:A (A A AA AA B*(BBc x tt5}tj|}t |d|d|dt di|d}dddn #1swxYwYn6#t tjtf$rt |j }YnwxYwt|S)z@ Load statistics for latest iteration from file rArBrCrENr/) rrrloadr@r3rJSONDecodeErrorKeyErrorrAr)rjr_data stat_datas r1load_iteration_statszAutoTracer.load_iteration_statss 2,-- D *--!%"8"'(8"9"'"8"."B"By1A"B"BDD  D D D D D D D D D D D D D D D -x8 2 2 2d011III 2i   s5A7A A+ A7+A//A72A/3A770B*)B*c |}|ttdt t |S)rNr)rdisabled_users_quantity)rupdaterrrr)rj stats_loadeds r1 get_statszAutoTracer.get_statssb0022 D88A;$'(8(8$9$9      r0r)rKN)&r(r)r*r+r`r staticmethodrrr,r objectrrrr.rrfrrrrrrrrr r rrCrxr rrr{rr __classcell__)rks@r1r\r\ns)====== $ $ $ $ $D:t:::\:1S1Xf-=111\1#$5s5555 x xHTN xd x x x x*d+/48 I# I4 I I I ID7?}DIMDDDD GXd^ G37 G<@ G G G G4T8x}T8T8T8T8n9='/~AI#8*5 *5*5*5*5X$T#Y$$$$//// J J J J !d ! ! ! !4r0r\__main__zauto_tracer_standalone.log)filenamelevel)Br+rerrarq collectionsr dataclassesrrrrrfnmatchr typingr r r r r urllib.parsernumpyrssa.dbrssa.internal.exceptionsrssa.modules.commonrssa.modules.storagerrrssa.autotracingrrrssa.configurationrssa.configuration.schemesrssa.internal.constantsrssa.internal.utilsrrrrtxray.shared_libraryshared_libraryrp ImportErrorr~r r3r@rGr,rZr\r( basicConfigDEBUGrr/r0r1rs  ######0000000000((((((((AAAAAAAAAAAAAA!!!!!!!!!!!!,,,,,,%%%%%% LKKKKKKKKK++++++AAAAAA999999 *******HHHHHH              @@@@@@@ @: &3 &4 & & & &GGGGGGGGT  zKMMMG!=%m---- AACCCCC s BB&B!!B&