From 3bb213a00dfc466ba931f6c9e52e8216f147c423 Mon Sep 17 00:00:00 2001 From: enzo Date: Wed, 13 May 2026 02:56:16 +0200 Subject: [PATCH] fix(cve): parse HTML instead of non-existent JSON API - Debian Security Tracker has no public JSON API for individual CVEs - Now fetches and parses the HTML page directly - Searches for 'bookworm ... fixed' pattern in the vulnerability table - Cache files changed from .json to .html --- full_updater/backend/scanner.py | 34 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/full_updater/backend/scanner.py b/full_updater/backend/scanner.py index 6fd044a..c002ce7 100644 --- a/full_updater/backend/scanner.py +++ b/full_updater/backend/scanner.py @@ -17,37 +17,41 @@ def _ensure_cve_api_cache() -> None: os.makedirs(CVE_API_CACHE, exist_ok=True) -def _fetch_cve_status(cve_id: str) -> dict: - """Interroge l'API Debian Security Tracker pour une CVE, avec cache local.""" +def _fetch_cve_html(cve_id: str) -> str: + """Récupère le HTML de la page Debian Security Tracker pour une CVE.""" _ensure_cve_api_cache() - cache_path = os.path.join(CVE_API_CACHE, f"{cve_id}.json") + cache_path = os.path.join(CVE_API_CACHE, f"{cve_id}.html") if os.path.exists(cache_path): try: with open(cache_path, "r", encoding="utf-8") as f: - return json.load(f) + return f.read() except Exception: pass - url = f"https://security-tracker.debian.org/tracker/{cve_id}/json" + url = f"https://security-tracker.debian.org/tracker/{cve_id}" try: req = urllib.request.Request(url, headers={"User-Agent": "full-updater/1.0"}) with urllib.request.urlopen(req, timeout=15) as resp: - data = json.load(resp) + html = resp.read().decode("utf-8") with open(cache_path, "w", encoding="utf-8") as f: - json.dump(data, f) - return data + f.write(html) + return html except Exception: - return {} + return "" def _is_cve_actionable(cve_id: str, suite: str = "bookworm") -> bool: - """Retourne True si la CVE a un fixed_version pour le suite donné.""" - data = _fetch_cve_status(cve_id) - cve_data = data.get(cve_id, {}) - debian = cve_data.get("debian", {}) - suite_data = debian.get(suite, {}) - return suite_data.get("status") == "resolved" and "fixed_version" in suite_data + """Retourne True si la CVE est marquée 'fixed' pour le suite donné dans le HTML.""" + html = _fetch_cve_html(cve_id) + if not html: + return False + + # Chercher dans le tableau des packages vulnérables/fixed + # Pattern: suite (security)? version fixed + # Ex: bookworm 3.0.18-1~deb12u1 fixed + pattern = re.compile(rf"]*>\s*{re.escape(suite)}\s*(?:\(security\))?\s*\s*]*>[^<]+\s*]*>\s*fixed\s*", re.IGNORECASE) + return bool(pattern.search(html)) def filter_actionable_cves(cves: list[dict]) -> tuple[list[dict], int]: