diff --git a/docs/guide.fa.md b/docs/guide.fa.md index d0247453..84f7dc8a 100644 --- a/docs/guide.fa.md +++ b/docs/guide.fa.md @@ -225,7 +225,9 @@ HTTP / HTTPS مثل قبل از Apps Script می‌رود (تغییری نمی | ۶ | ۱۸۰ | توصیه‌شده برای استفادهٔ سنگین | | ۱۲ | ۳۶۰ | چند حساب — حداکثر توان | -بیشتر Deployment = همزمانی بیشتر = تأخیر کمتر هر سشن. هر بَچ بین IDها چرخش می‌کند و بار به‌طور یکنواخت توزیع می‌شود، احتمال رسیدن به سقف سهمیهٔ یک Deployment کاهش می‌یابد. +بیشتر Deployment = همزمانی بیشتر = تأخیر کمتر هر سشن. انتخاب هر بَچ از بین IDهای تنظیم‌شده با یک ledger محلی rolling 24-hour انجام می‌شود؛ بار پخش می‌شود و کلاینت از Deploymentهایی که همین دستگاه نزدیک سقف request سهمیهٔ رایگان برده دوری می‌کند. + +پنل **Script health** در UI دسکتاپ همین وضعیت محلی را فقط به‌صورت read-only نشان می‌دهد: Deployment ID ماسک‌شده، تعداد callهای مشاهده‌شده در پنجرهٔ rolling 24-hour، اینکه threshold محلی free-tier اشباع شده یا نه، cooldown باقی‌مانده، دلیل/کلاس خطایی که آن cooldown را ساخته، و تعداد timeout strikeهای فعلی. این فقط telemetry سمت کلاینت است؛ اگر دستگاه‌های دیگر هم از همان deployment استفاده کنند، Google ممکن است callهای بیشتری شمرده باشد. **محافظ‌های منابع:** - **حداکثر ۵۰ op** در هر بَچ — اگر سشن‌های فعال بیشتر باشند، مالتی‌پلکسر چند بَچ می‌فرستد @@ -350,7 +352,7 @@ sni_hosts = ["www.google.com", "drive.google.com", "docs.google.com"] | Connection pool | TTL ۴۵ ثانیه، حداکثر ۲۰ idle | | رمزگشایی gzip | اتوماتیک | | چند اسکریپت | چرخش round-robin | -| Blacklist خودکار | روی خطای 429 / quota، با cooldown ۱۰ دقیقه | +| قرنطینهٔ خودکار اسکریپت | خطاهای quota/account برای ۲۴ ساعت؛ خطاهای گذرای relay با cooldown کوتاه | | کش پاسخ | ۵۰ مگابایت، FIFO + TTL، آگاه از `Cache-Control: max-age`، heuristic برای static asset | | Coalescing | GETهای یکسان همزمان یک fetch upstream را به اشتراک می‌گذارند | | تونل بازنویسی SNI | مستقیم به لبهٔ گوگل (بدون رله) برای `google.com`، `youtube.com`، `youtu.be`، `youtube-nocookie.com`، `fonts.googleapis.com` — دامنه‌های اضافی از فیلد `hosts` | diff --git a/docs/guide.md b/docs/guide.md index 679a35d0..d1d28fca 100644 --- a/docs/guide.md +++ b/docs/guide.md @@ -225,7 +225,9 @@ max_concurrent = 30 × number_of_deployment_ids | 6 | 180 | Recommended for heavy use | | 12 | 360 | Multi-account power setup | -More deployments = more total concurrency = lower per-session latency. Each batch round-robins across your IDs, spreading load and reducing the chance of hitting any single deployment's quota ceiling. +More deployments = more total concurrency = lower per-session latency. Each batch is selected from the configured IDs with a local rolling 24-hour ledger, spreading load and steering away from deployments this client has already driven near the free-tier request budget. + +The desktop **Script health** panel shows this local state without changing routing behavior: masked deployment ID, locally observed calls inside the rolling 24-hour window, whether the local free-tier steering threshold is saturated, any remaining cooldown, the failure class/reason that set that cooldown, and the current timeout-strike count. Treat it as client-side telemetry only; Google may also count requests from other devices using the same deployment. **Resource guards:** - **50 ops max** per batch — if more sessions are active, the mux splits into multiple batches @@ -346,7 +348,7 @@ This port focuses on the **`apps_script` mode** — the only one that reliably w - [x] Connection pooling (45 s TTL, max 20 idle) - [x] Gzip response decoding - [x] Multi-script round-robin -- [x] Auto-blacklist failing scripts on 429 / quota errors (10 min cooldown) +- [x] Auto-quarantine failing scripts: quota/account failures for 24 h, transient relay failures for a short cooldown - [x] Response cache (50 MB, FIFO + TTL, `Cache-Control: max-age` aware, heuristics for static assets) - [x] Request coalescing: concurrent identical GETs share one upstream fetch - [x] SNI-rewrite tunnels for `google.com`, `youtube.com`, `youtu.be`, `youtube-nocookie.com`, `fonts.googleapis.com`, configurable via `hosts` map diff --git a/src/bin/ui.rs b/src/bin/ui.rs index c5f9ed63..11a0e546 100644 --- a/src/bin/ui.rs +++ b/src/bin/ui.rs @@ -117,6 +117,7 @@ struct UiState { running: bool, started_at: Option, last_stats: Option, + last_script_health: Vec, last_per_site: Vec<(String, mhrv_rs::domain_fronter::HostStat)>, log: VecDeque, /// Result + timestamp for transient status banners (auto-hide after 10s). @@ -1156,7 +1157,7 @@ impl eframe::App for App { ui.add_space(8.0); // ── Status + stats card ──────────────────────────────────────── - let (running, started_at, stats, ca_trusted, last_test_msg, per_site) = { + let (running, started_at, stats, ca_trusted, last_test_msg, per_site, script_health) = { let s = self.shared.state.lock().unwrap(); ( s.running, @@ -1165,6 +1166,7 @@ impl eframe::App for App { s.ca_trusted, s.last_test_msg.clone(), s.last_per_site.clone(), + s.last_script_health.clone(), ) }; @@ -1318,6 +1320,66 @@ impl eframe::App for App { }); } + if !script_health.is_empty() { + ui.add_space(2.0); + egui::CollapsingHeader::new(format!( + "Script health ({} deployments)", + script_health.len() + )) + .default_open(false) + .show(ui, |ui| { + egui::ScrollArea::vertical() + .max_height(160.0) + .show(ui, |ui| { + egui::Grid::new("script_health") + .num_columns(5) + .spacing([8.0, 2.0]) + .striped(true) + .show(ui, |ui| { + ui.label(egui::RichText::new("script").strong()); + ui.label(egui::RichText::new("quota").strong()); + ui.label(egui::RichText::new("cooldown").strong()); + ui.label(egui::RichText::new("reason").strong()); + ui.label(egui::RichText::new("timeouts").strong()); + ui.end_row(); + + for st in &script_health { + let quota = format!( + "{} / {}{}", + st.quota_used, + st.quota_limit, + if st.quota_saturated { " saturated" } else { "" } + ); + let cooldown = st + .cooldown_secs + .map(fmt_seconds_compact) + .unwrap_or_else(|| "-".to_string()); + let reason = st + .cooldown_reason + .as_deref() + .unwrap_or("-") + .to_string(); + ui.label(egui::RichText::new(&st.script_id).monospace()); + ui.label(egui::RichText::new(quota).monospace()); + ui.label(egui::RichText::new(cooldown).monospace()); + ui.label(egui::RichText::new(reason).small()); + ui.label( + egui::RichText::new(st.timeout_strikes.to_string()) + .monospace(), + ); + ui.end_row(); + } + }); + }); + ui.small( + egui::RichText::new( + "Local view only: Google quota can also be consumed by other clients using the same deployment.", + ) + .color(egui::Color32::from_gray(130)), + ); + }); + } + if !per_site.is_empty() { ui.add_space(2.0); egui::CollapsingHeader::new(format!("Per-site ({} hosts)", per_site.len())) @@ -1949,6 +2011,16 @@ fn fmt_duration(d: Duration) -> String { format!("{:02}:{:02}:{:02}", s / 3600, (s / 60) % 60, s % 60) } +fn fmt_seconds_compact(seconds: u64) -> String { + if seconds >= 3600 { + format!("{}h {}m", seconds / 3600, (seconds / 60) % 60) + } else if seconds >= 60 { + format!("{}m {}s", seconds / 60, seconds % 60) + } else { + format!("{}s", seconds) + } +} + fn fmt_bytes(b: u64) -> String { const K: u64 = 1024; const M: u64 = K * K; @@ -1986,9 +2058,11 @@ fn background_thread(shared: Arc, rx: Receiver) { if let Some(fronter) = f.as_ref() { let s = fronter.snapshot_stats(); let per_site = fronter.snapshot_per_site(); + let script_health = fronter.snapshot_script_health(); let mut st = shared.state.lock().unwrap(); st.last_stats = Some(s); st.last_per_site = per_site; + st.last_script_health = script_health; } }); } @@ -2064,6 +2138,7 @@ fn background_thread(shared: Arc, rx: Receiver) { // or normal shutdown without Cmd::Stop). The // Stop handler clears this too — either is fine. st.proxy_active = false; + st.last_script_health.clear(); } push_log(&shared2, "[ui] proxy stopped"); }); @@ -2094,6 +2169,7 @@ fn background_thread(shared: Arc, rx: Receiver) { st.running = false; st.started_at = None; st.proxy_active = false; + st.last_script_health.clear(); } } @@ -2568,3 +2644,15 @@ fn push_log(shared: &Shared, msg: &str) { s.log.pop_front(); } } + +#[cfg(test)] +mod tests { + use super::fmt_seconds_compact; + + #[test] + fn compact_seconds_formatter_scales_units() { + assert_eq!(fmt_seconds_compact(9), "9s"); + assert_eq!(fmt_seconds_compact(125), "2m 5s"); + assert_eq!(fmt_seconds_compact(3660), "1h 1m"); + } +} diff --git a/src/domain_fronter.rs b/src/domain_fronter.rs index 0e11e764..bb79a2df 100644 --- a/src/domain_fronter.rs +++ b/src/domain_fronter.rs @@ -13,7 +13,7 @@ //! buffered `relay_parallel_range` compatibility wrapper for callers that //! want a `Vec` back. -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; // AtomicU64 via portable-atomic: native on 64-bit / armv7, spinlock- // backed on mipsel (MIPS32 has no 64-bit atomic instructions). API // is identical to std::sync::atomic::AtomicU64 so call sites need @@ -147,6 +147,12 @@ const H1_OPEN_TIMEOUT_SECS: u64 = 8; /// request to wake back up — most painful on YouTube / streaming where /// the first chunk after a quiet pause stalls the player. const H1_KEEPALIVE_INTERVAL_SECS: u64 = 240; +/// Conservative local estimate of the Apps Script UrlFetchApp free-tier +/// request budget per deployment account. This is not an authoritative Google +/// quota read; it is a client-side selector guard that avoids concentrating +/// traffic on a deployment this process has already used heavily. +const SCRIPT_QUOTA_FREE_TIER_CALLS: usize = 20_000; +const SCRIPT_QUOTA_WINDOW: Duration = Duration::from_secs(24 * 60 * 60); /// Largest response body Apps Script's `UrlFetchApp` will deliver before /// the script gets killed mid-execution. The hard wire ceiling is ~50 MiB; /// after base64 / envelope overhead and edge variance, the practical raw @@ -357,6 +363,15 @@ pub struct DomainFronter { inflight: Arc>>>>, coalesced: AtomicU64, blacklist: Arc>>, + /// Human-readable cooldown reason keyed by script ID. Kept separate + /// from `blacklist` so the selection hot path still only checks the + /// timestamp map while diagnostics can explain the active cooldown. + script_cooldown_reasons: Arc>>, + /// Per-deployment local call ledger used by `next_script_id` / + /// `next_script_ids` to avoid selecting an already saturated deployment + /// while another configured script still has locally-observed capacity. + /// Entries are pruned on selection against a rolling 24-hour window. + script_quota_ledger: Arc>>>, /// Per-deployment rolling timeout counter. Maps `script_id` → /// `(window_start, strike_count)`. Reset when the window expires /// or when a batch succeeds. Triggers a short-cooldown blacklist @@ -460,7 +475,30 @@ impl HostStat { } } -const BLACKLIST_COOLDOWN_SECS: u64 = 600; +const TRANSIENT_SCRIPT_COOLDOWN_SECS: u64 = 600; +const HARD_SCRIPT_QUARANTINE_SECS: u64 = 24 * 60 * 60; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum ScriptQuarantine { + Hard, + Transient, +} + +impl ScriptQuarantine { + fn cooldown(self) -> Duration { + match self { + ScriptQuarantine::Hard => Duration::from_secs(HARD_SCRIPT_QUARANTINE_SECS), + ScriptQuarantine::Transient => Duration::from_secs(TRANSIENT_SCRIPT_COOLDOWN_SECS), + } + } + + fn label(self) -> &'static str { + match self { + ScriptQuarantine::Hard => "hard quota/account quarantine", + ScriptQuarantine::Transient => "transient relay cooldown", + } + } +} /// Auto-blacklist defaults are now per-instance fields on `DomainFronter`, /// driven by `Config::auto_blacklist_strikes` / `_window_secs` / @@ -620,6 +658,8 @@ impl DomainFronter { inflight: Arc::new(Mutex::new(HashMap::new())), coalesced: AtomicU64::new(0), blacklist: Arc::new(std::sync::Mutex::new(HashMap::new())), + script_cooldown_reasons: Arc::new(std::sync::Mutex::new(HashMap::new())), + script_quota_ledger: Arc::new(std::sync::Mutex::new(HashMap::new())), script_timeouts: Arc::new(std::sync::Mutex::new(HashMap::new())), relay_calls: AtomicU64::new(0), relay_failures: AtomicU64::new(0), @@ -781,6 +821,43 @@ impl DomainFronter { } } + pub fn snapshot_script_health(&self) -> Vec { + let now = Instant::now(); + let bl = self.blacklist.lock().unwrap(); + let reasons = self.script_cooldown_reasons.lock().unwrap(); + let quota = self.script_quota_ledger.lock().unwrap(); + let timeouts = self.script_timeouts.lock().unwrap(); + + self.script_ids + .iter() + .map(|sid| { + let quota_used = quota + .get(sid) + .map(|calls| { + calls + .iter() + .filter(|at| now.saturating_duration_since(**at) < SCRIPT_QUOTA_WINDOW) + .count() + }) + .unwrap_or(0); + let cooldown_secs = bl + .get(sid) + .map(|until| until.saturating_duration_since(now).as_secs()) + .filter(|secs| *secs > 0); + let timeout_strikes = timeouts.get(sid).map(|(_, strikes)| *strikes).unwrap_or(0); + ScriptHealthSnapshot { + script_id: mask_script_id(sid), + quota_used, + quota_limit: SCRIPT_QUOTA_FREE_TIER_CALLS, + quota_saturated: quota_used >= SCRIPT_QUOTA_FREE_TIER_CALLS, + cooldown_secs, + cooldown_reason: reasons.get(sid).cloned(), + timeout_strikes, + } + }) + .collect() + } + pub fn num_scripts(&self) -> usize { self.script_ids.len() } @@ -802,21 +879,44 @@ impl DomainFronter { let mut bl = self.blacklist.lock().unwrap(); let now = Instant::now(); bl.retain(|_, until| *until > now); - + self.script_cooldown_reasons + .lock() + .unwrap() + .retain(|sid, _| bl.contains_key(sid)); + let mut quota = self.script_quota_ledger.lock().unwrap(); + prune_script_quota_ledger(&mut quota, now); + + let mut saturated_fallback: Option = None; for _ in 0..n { let idx = self.script_idx.fetch_add(1, Ordering::Relaxed); let sid = &self.script_ids[idx % n]; if !bl.contains_key(sid) { - return sid.clone(); + if script_has_local_quota_capacity("a, sid) { + record_script_quota_call_locked(&mut quota, sid, now); + return sid.clone(); + } + saturated_fallback.get_or_insert_with(|| sid.clone()); } } + // If every non-blacklisted deployment is locally saturated, preserve + // connectivity instead of hard-failing. Paid Workspace quotas and + // traffic from other clients are not visible to this process, so this + // ledger is a steering signal, not an authoritative quota gate. + if let Some(sid) = saturated_fallback { + record_script_quota_call_locked(&mut quota, &sid, now); + return sid; + } // All blacklisted: pick whichever comes off cooldown soonest. if let Some((sid, _)) = bl.iter().min_by_key(|(_, t)| **t) { let sid = sid.clone(); bl.remove(&sid); + self.script_cooldown_reasons.lock().unwrap().remove(&sid); + record_script_quota_call_locked(&mut quota, &sid, now); return sid; } - self.script_ids[0].clone() + let sid = self.script_ids[0].clone(); + record_script_quota_call_locked(&mut quota, &sid, now); + sid } /// Pick `want` distinct non-blacklisted script IDs for a parallel fan-out @@ -831,8 +931,15 @@ impl DomainFronter { let mut bl = self.blacklist.lock().unwrap(); let now = Instant::now(); bl.retain(|_, until| *until > now); + self.script_cooldown_reasons + .lock() + .unwrap() + .retain(|sid, _| bl.contains_key(sid)); + let mut quota = self.script_quota_ledger.lock().unwrap(); + prune_script_quota_ledger(&mut quota, now); let mut picked: Vec = Vec::with_capacity(want); + let mut saturated_fallback: Vec = Vec::with_capacity(want); for _ in 0..n { if picked.len() >= want { break; @@ -840,20 +947,31 @@ impl DomainFronter { let idx = self.script_idx.fetch_add(1, Ordering::Relaxed); let sid = &self.script_ids[idx % n]; if !bl.contains_key(sid) && !picked.iter().any(|p| p == sid) { - picked.push(sid.clone()); + if script_has_local_quota_capacity("a, sid) { + picked.push(sid.clone()); + } else if !saturated_fallback.iter().any(|p| p == sid) { + saturated_fallback.push(sid.clone()); + } } } if picked.is_empty() { - picked.push(self.script_ids[0].clone()); + if let Some(sid) = saturated_fallback.into_iter().next() { + picked.push(sid); + } else { + picked.push(self.script_ids[0].clone()); + } + } + for sid in &picked { + record_script_quota_call_locked(&mut quota, sid, now); } picked } - fn blacklist_script(&self, script_id: &str, reason: &str) { + fn quarantine_script(&self, script_id: &str, quarantine: ScriptQuarantine, reason: &str) { self.blacklist_script_for( script_id, - Duration::from_secs(BLACKLIST_COOLDOWN_SECS), - reason, + quarantine.cooldown(), + &format!("{}: {}", quarantine.label(), reason), ); } @@ -861,6 +979,10 @@ impl DomainFronter { let until = Instant::now() + cooldown; let mut bl = self.blacklist.lock().unwrap(); bl.insert(script_id.to_string(), until); + self.script_cooldown_reasons + .lock() + .unwrap() + .insert(script_id.to_string(), reason.to_string()); tracing::warn!( "blacklisted script {} for {}s: {}", mask_script_id(script_id), @@ -2503,8 +2625,12 @@ impl DomainFronter { .chars() .take(200) .collect::(); - if should_blacklist(status, &body_txt) { - self.blacklist_script(&script_id, &format!("HTTP {}", status)); + if let Some(quarantine) = classify_script_failure(status, &body_txt) { + self.quarantine_script( + &script_id, + quarantine, + &format!("HTTP {}", status), + ); } return Err(FronterError::Relay(format!( "Apps Script HTTP {}: {}", @@ -2514,7 +2640,7 @@ impl DomainFronter { return parse_relay_json(&resp_body).map_err(|e| { if let FronterError::Relay(ref msg) = e { if looks_like_quota_error(msg) { - self.blacklist_script(&script_id, msg); + self.quarantine_script(&script_id, ScriptQuarantine::Hard, msg); } } e @@ -2612,8 +2738,12 @@ impl DomainFronter { .chars() .take(200) .collect::(); - if should_blacklist(status, &body_txt) { - self.blacklist_script(&script_id, &format!("HTTP {}", status)); + if let Some(quarantine) = classify_script_failure(status, &body_txt) { + self.quarantine_script( + &script_id, + quarantine, + &format!("HTTP {}", status), + ); } return Err(FronterError::Relay(format!( "Apps Script HTTP {}: {}", @@ -2625,7 +2755,11 @@ impl DomainFronter { Err(e) => { if let FronterError::Relay(ref msg) = e { if looks_like_quota_error(msg) { - self.blacklist_script(&script_id, msg); + self.quarantine_script( + &script_id, + ScriptQuarantine::Hard, + msg, + ); } } Err(e) @@ -3022,8 +3156,8 @@ impl DomainFronter { .chars() .take(200) .collect::(); - if should_blacklist(status, &body_txt) { - self.blacklist_script(script_id, &format!("HTTP {}", status)); + if let Some(quarantine) = classify_script_failure(status, &body_txt) { + self.quarantine_script(script_id, quarantine, &format!("HTTP {}", status)); } return Err(FronterError::Relay(format!( "tunnel HTTP {}: {}", @@ -3212,8 +3346,8 @@ impl DomainFronter { .chars() .take(200) .collect::(); - if should_blacklist(status, &body_txt) { - self.blacklist_script(script_id, &format!("HTTP {}", status)); + if let Some(quarantine) = classify_script_failure(status, &body_txt) { + self.quarantine_script(script_id, quarantine, &format!("HTTP {}", status)); } return Err(FronterError::Relay(format!( "batch tunnel HTTP {}: {}", @@ -3879,6 +4013,40 @@ fn add_random_pad(map: &mut serde_json::Map) { map.insert("_pad".into(), Value::String(B64.encode(&buf))); } +fn prune_script_quota_ledger( + ledger: &mut HashMap>, + now: Instant, +) { + let cutoff = now.checked_sub(SCRIPT_QUOTA_WINDOW).unwrap_or(now); + ledger.retain(|_, calls| { + while calls.front().map(|ts| *ts <= cutoff).unwrap_or(false) { + calls.pop_front(); + } + !calls.is_empty() + }); +} + +fn script_has_local_quota_capacity( + ledger: &HashMap>, + script_id: &str, +) -> bool { + ledger + .get(script_id) + .map(|calls| calls.len() < SCRIPT_QUOTA_FREE_TIER_CALLS) + .unwrap_or(true) +} + +fn record_script_quota_call_locked( + ledger: &mut HashMap>, + script_id: &str, + now: Instant, +) { + ledger + .entry(script_id.to_string()) + .or_default() + .push_back(now); +} + /// "YYYY-MM-DD" of the current Pacific Time date. Used as the daily-reset /// boundary for `today_calls` / `today_bytes` because **Apps Script's /// quota counter resets at midnight Pacific Time, not UTC** — that's @@ -4833,6 +5001,24 @@ pub struct StatsSnapshot { pub h2_disabled: bool, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ScriptHealthSnapshot { + /// Masked deployment ID (`prefix...suffix`) safe to render in logs/UI. + pub script_id: String, + /// Locally observed calls inside the rolling 24-hour steering window. + pub quota_used: usize, + /// Local free-tier steering threshold. This is not an authoritative + /// Google-side quota read; it is the client-side selection guard. + pub quota_limit: usize, + pub quota_saturated: bool, + /// Remaining local cooldown, if the deployment is currently sidelined. + pub cooldown_secs: Option, + /// Human-readable failure class/reason used when the cooldown was set. + pub cooldown_reason: Option, + /// Current timeout strikes inside the auto-blacklist rolling window. + pub timeout_strikes: u32, +} + impl StatsSnapshot { pub fn hit_rate(&self) -> f64 { let total = self.cache_hits + self.cache_misses; @@ -4909,11 +5095,17 @@ impl StatsSnapshot { } } -fn should_blacklist(status: u16, body: &str) -> bool { +fn classify_script_failure(status: u16, body: &str) -> Option { if status == 429 || status == 403 { - return true; + return Some(ScriptQuarantine::Hard); + } + if looks_like_quota_error(body) { + return Some(ScriptQuarantine::Hard); } - looks_like_quota_error(body) + if matches!(status, 500 | 502 | 503 | 504) && looks_like_transient_script_error(body) { + return Some(ScriptQuarantine::Transient); + } + None } fn looks_like_quota_error(msg: &str) -> bool { @@ -4930,6 +5122,20 @@ fn looks_like_quota_error(msg: &str) -> bool { || lower.contains("limit exceeded") } +fn looks_like_transient_script_error(msg: &str) -> bool { + let lower = msg.to_ascii_lowercase(); + lower.contains("google") + || lower.contains("apps script") + || lower.contains("script.google") + || lower.contains("googleusercontent") + || lower.contains("gfe") + || lower.contains("backend error") + || lower.contains("service unavailable") + || lower.contains("temporary") + || lower.contains("timeout") + || lower.contains("timed out") +} + fn mask_script_id(id: &str) -> String { let n = id.chars().count(); if n <= 8 { @@ -5050,6 +5256,15 @@ mod tests { use std::task::{Context, Poll}; use tokio::io::{duplex, AsyncRead, AsyncWriteExt, ReadBuf}; + fn find_script_health<'a>( + rows: &'a [ScriptHealthSnapshot], + masked_script_id: &str, + ) -> &'a ScriptHealthSnapshot { + rows.iter() + .find(|row| row.script_id == masked_script_id) + .expect("script health row must exist") + } + // Test fixture for ungraceful TLS close: emit a fixed prefix of bytes // then return io::ErrorKind::UnexpectedEof on the next read. Mirrors // what rustls surfaces when the peer closes TCP without sending a @@ -6369,11 +6584,21 @@ hello"; #[test] fn blacklist_heuristics() { - assert!(should_blacklist(429, "")); - assert!(should_blacklist(403, "quota")); - assert!(should_blacklist(500, "Service invoked too many times per day: urlfetch")); - assert!(!should_blacklist(200, "")); - assert!(!should_blacklist(502, "bad gateway")); + assert_eq!(classify_script_failure(429, ""), Some(ScriptQuarantine::Hard)); + assert_eq!( + classify_script_failure(403, "quota"), + Some(ScriptQuarantine::Hard) + ); + assert_eq!( + classify_script_failure(500, "Service invoked too many times per day: urlfetch"), + Some(ScriptQuarantine::Hard) + ); + assert_eq!(classify_script_failure(502, "bad gateway"), None); + assert_eq!( + classify_script_failure(502, "Google backend error"), + Some(ScriptQuarantine::Transient) + ); + assert_eq!(classify_script_failure(200, ""), None); assert!(looks_like_quota_error("Exception: Service invoked too many times per day")); assert!(looks_like_quota_error( "Exception: Bandbreitenkontingent überschritten: https://example.com. Verringern Sie die Datenübertragungsrate." @@ -6381,6 +6606,63 @@ hello"; assert!(!looks_like_quota_error("bad url")); } + #[test] + fn script_quarantine_durations_match_failure_class() { + assert_eq!( + ScriptQuarantine::Hard.cooldown(), + Duration::from_secs(HARD_SCRIPT_QUARANTINE_SECS) + ); + assert_eq!( + ScriptQuarantine::Transient.cooldown(), + Duration::from_secs(TRANSIENT_SCRIPT_COOLDOWN_SECS) + ); + } + + #[test] + fn script_health_snapshot_exposes_quota_and_cooldown_state() { + let fronter = fronter_for_script_ids(&["SCRIPT_A", "SCRIPT_B"]); + let now = Instant::now(); + seed_script_quota(&fronter, "SCRIPT_A", 3, now); + fronter.blacklist_script_for( + "SCRIPT_A", + Duration::from_secs(600), + "transient relay cooldown: HTTP 502", + ); + fronter.record_timeout_strike("SCRIPT_B"); + + let rows = fronter.snapshot_script_health(); + let a = find_script_health(&rows, "SCRI...PT_A"); + let b = find_script_health(&rows, "SCRI...PT_B"); + + assert_eq!(a.quota_used, 3); + assert_eq!(a.quota_limit, SCRIPT_QUOTA_FREE_TIER_CALLS); + assert!(!a.quota_saturated); + assert!(a.cooldown_secs.is_some_and(|secs| secs <= 600 && secs > 0)); + assert_eq!( + a.cooldown_reason.as_deref(), + Some("transient relay cooldown: HTTP 502") + ); + assert_eq!(b.timeout_strikes, 1); + assert!(b.cooldown_secs.is_none()); + } + + #[test] + fn script_health_snapshot_marks_local_quota_saturation() { + let fronter = fronter_for_script_ids(&["SCRIPT_A"]); + seed_script_quota( + &fronter, + "SCRIPT_A", + SCRIPT_QUOTA_FREE_TIER_CALLS, + Instant::now(), + ); + + let rows = fronter.snapshot_script_health(); + let row = find_script_health(&rows, "SCRI...PT_A"); + + assert_eq!(row.quota_used, SCRIPT_QUOTA_FREE_TIER_CALLS); + assert!(row.quota_saturated); + } + #[test] fn mask_script_id_hides_middle() { assert_eq!(mask_script_id("short"), "***"); @@ -6606,6 +6888,110 @@ hello"; DomainFronter::new(&cfg).expect("test fronter must construct") } + fn fronter_for_script_ids(script_ids: &[&str]) -> DomainFronter { + let script_ids_json = serde_json::to_string(script_ids).unwrap(); + let json = format!( + r#"{{ + "mode": "apps_script", + "google_ip": "127.0.0.1", + "front_domain": "www.google.com", + "script_id": {}, + "auth_key": "test_auth_key", + "listen_host": "127.0.0.1", + "listen_port": 8085, + "log_level": "info", + "verify_ssl": true + }}"#, + script_ids_json + ); + let cfg: Config = serde_json::from_str(&json).unwrap(); + DomainFronter::new(&cfg).expect("test fronter must construct") + } + + fn seed_script_quota(fronter: &DomainFronter, script_id: &str, count: usize, at: Instant) { + let mut ledger = fronter.script_quota_ledger.lock().unwrap(); + ledger + .entry(script_id.to_string()) + .or_default() + .extend(std::iter::repeat(at).take(count)); + } + + #[test] + fn next_script_id_skips_locally_saturated_deployment() { + let fronter = fronter_for_script_ids(&["SCRIPT_A", "SCRIPT_B"]); + seed_script_quota( + &fronter, + "SCRIPT_A", + SCRIPT_QUOTA_FREE_TIER_CALLS, + Instant::now(), + ); + + let selected = fronter.next_script_id(); + + assert_eq!(selected, "SCRIPT_B"); + let ledger = fronter.script_quota_ledger.lock().unwrap(); + assert_eq!( + ledger.get("SCRIPT_B").map(|calls| calls.len()), + Some(1), + "selection must be recorded in the local rolling ledger" + ); + } + + #[test] + fn script_quota_prune_removes_expired_observations() { + let recorded_at = Instant::now(); + let prune_at = recorded_at + .checked_add(SCRIPT_QUOTA_WINDOW + Duration::from_secs(1)) + .expect("test clock must support a 24h monotonic addition"); + let mut ledger = HashMap::new(); + ledger.insert( + "SCRIPT_A".to_string(), + std::iter::repeat(recorded_at) + .take(SCRIPT_QUOTA_FREE_TIER_CALLS) + .collect::>(), + ); + + prune_script_quota_ledger(&mut ledger, prune_at); + + assert!( + ledger.is_empty(), + "rolling quota ledger should discard observations outside the 24h window" + ); + } + + #[test] + fn next_script_id_preserves_connectivity_when_all_scripts_are_locally_saturated() { + let fronter = fronter_for_script_ids(&["SCRIPT_A", "SCRIPT_B"]); + let now = Instant::now(); + seed_script_quota(&fronter, "SCRIPT_A", SCRIPT_QUOTA_FREE_TIER_CALLS, now); + seed_script_quota(&fronter, "SCRIPT_B", SCRIPT_QUOTA_FREE_TIER_CALLS, now); + + let selected = fronter.next_script_id(); + + assert_eq!(selected, "SCRIPT_A"); + let ledger = fronter.script_quota_ledger.lock().unwrap(); + assert_eq!( + ledger.get("SCRIPT_A").map(|calls| calls.len()), + Some(SCRIPT_QUOTA_FREE_TIER_CALLS + 1), + "local saturation is a steering signal, not a hard outage trigger" + ); + } + + #[test] + fn parallel_script_selection_prefers_unsaturated_deployments() { + let fronter = fronter_for_script_ids(&["SCRIPT_A", "SCRIPT_B", "SCRIPT_C"]); + seed_script_quota( + &fronter, + "SCRIPT_A", + SCRIPT_QUOTA_FREE_TIER_CALLS, + Instant::now(), + ); + + let selected = fronter.next_script_ids(2); + + assert_eq!(selected, vec!["SCRIPT_B".to_string(), "SCRIPT_C".to_string()]); + } + #[tokio::test(flavor = "current_thread")] async fn force_http1_disables_h2_at_construction() { // The kill switch: force_http1=true must mark the fronter as