Add Googlebot listing audit (verify)

2026-01-02 21:56:46 +00:00 · 2026-01-02 21:56:46 +00:00 · aac9108158
commit aac9108158
parent 8d04a67000
8 changed files with 804 additions and 0 deletions
--- a/verify/.gitignore
+++ b/verify/.gitignore
@ -0,0 +1,4 @@
+__pycache__/
+*.pyc
+.venv/
+
--- a/verify/README.md
+++ b/verify/README.md
@ -0,0 +1,28 @@
+# Verify (crawler-view audit)
+
+This folder contains a lightweight SEO audit that fetches external listing/profile URLs using a Googlebot User-Agent, then extracts:
+
+- Booking.com / TripAdvisor: rating, review count, badges (JSON-LD first)
+- Instagram / Facebook / TikTok: follower count + last post date hints (meta tags)
+- Policy check: searches page text for "New Year's Eve", "minimum stay", "sold out"
+
+## Setup
+
+```bash
+python3 -m venv .venv
+. .venv/bin/activate
+pip install -r verify/requirements.txt
+```
+
+## Run
+
+```bash
+python verify/tools/audit_listings.py --platform auto <url1> <url2>
+python verify/tools/audit_listings.py --platform auto --include-jsonld <url>
+```
+
+To save output for review:
+
+```bash
+python verify/tools/audit_listings.py --platform auto <url1> <url2> > verify/results/audit.jsonl
+```
--- a/verify/ho36/raw/ho36bookingho36_query__20260102.html
+++ b/verify/ho36/raw/ho36bookingho36_query__20260102.html
--- a/verify/ho36/screenshots/ho36bookingho36_query__20260102.png
+++ b/verify/ho36/screenshots/ho36bookingho36_query__20260102.png
--- a/verify/requirements.txt
+++ b/verify/requirements.txt
@ -0,0 +1,3 @@
+requests>=2.32.0
+beautifulsoup4>=4.12.0
+
--- a/verify/results/flaneur_googlebot_audit.jsonl
+++ b/verify/results/flaneur_googlebot_audit.jsonl
@ -0,0 +1,12 @@
+{"url": "https://booking.roomraccoon.fr/le-fl-neur-guesthouse-8346/fr/", "platform": "policy", "fetched_at": "2026-01-02T20:23:18+00:00", "status_code": 200, "final_url": "https://booking.roomraccoon.fr/le-fl-neur-guesthouse-8346/fr/", "ok": true, "error": null, "data": {"content_type": "text/html; charset=UTF-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://leflaneur-guesthouse.com/", "platform": "policy", "fetched_at": "2026-01-02T20:23:19+00:00", "status_code": 200, "final_url": "https://leflaneur-guesthouse.com/", "ok": true, "error": null, "data": {"content_type": "text/html", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://leflaneur-guesthouse.com/dormir", "platform": "policy", "fetched_at": "2026-01-02T20:23:19+00:00", "status_code": 200, "final_url": "https://leflaneur-guesthouse.com/dormir", "ok": true, "error": null, "data": {"content_type": "text/html", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.booking.com/searchresults.html?ss=Le%20Fl%C3%A2neur%20Guesthouse%20Lyon", "platform": "booking", "fetched_at": "2026-01-02T20:23:20+00:00", "status_code": 202, "final_url": "https://www.booking.com/searchresults.html", "ok": true, "error": null, "data": {"content_type": "text/html", "content_length": "3962", "rating": null, "review_count": null, "badges": [], "jsonld_count": 0, "jsonld_parse_errors": []}}
+{"url": "https://www.facebook.com/leflaneurlyon", "platform": "facebook", "fetched_at": "2026-01-02T20:23:21+00:00", "status_code": 200, "final_url": "https://m.facebook.com/login/?next=https%3A%2F%2Fm.facebook.com%2Fleflaneurlyon%2F", "ok": true, "error": null, "data": {"content_type": "text/html; charset=utf-8", "content_length": null, "meta_description": "登录 Facebook，与好友、家人和认识的人分享和建立联系。", "follower_count": null, "last_post_date": null, "og_title": "登录 Facebook | Facebook", "og_url": "https://www.facebook.com/"}}
+{"url": "https://www.google.com/maps/place/Le+Fl%C3%A2neur+Guesthouse/@45.7512135,4.8428045,17z/data=!3m1!5s0x47f4ea4464dcb499:0x7fbb59cd88d1026a!4m9!3m8!1s0x47f4ea446430af35:0xe27846417ed8f4f!5m2!4m1!1i2!8m2!3d45.7512135!4d4.8428045!16s%2Fg%2F11ckqn6t7v", "platform": "policy", "fetched_at": "2026-01-02T20:23:23+00:00", "status_code": 200, "final_url": "https://www.google.com/maps/place/Le+Fl%C3%A2neur+Guesthouse/@45.7512135,4.8428045,17z/data=!3m1!5s0x47f4ea4464dcb499:0x7fbb59cd88d1026a!4m9!3m8!1s0x47f4ea446430af35:0xe27846417ed8f4f!5m2!4m1!1i2!8m2!3d45.7512135!4d4.8428045!16s%2Fg%2F11ckqn6t7v", "ok": true, "error": null, "data": {"content_type": "text/html; charset=UTF-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.google.com/maps/search/?api=1&query=Fl%C3%A2neur%20Hostel%20Lyon", "platform": "policy", "fetched_at": "2026-01-02T20:23:25+00:00", "status_code": 200, "final_url": "https://www.google.com/maps/search/?api=1&query=Fl%C3%A2neur%20Hostel%20Lyon", "ok": true, "error": null, "data": {"content_type": "text/html; charset=UTF-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.hostelworld.com/hostels/p/100844/le-flaneur-guesthouse/", "platform": "policy", "fetched_at": "2026-01-02T20:23:27+00:00", "status_code": 200, "final_url": "https://www.hostelworld.com/hostels/p/100844/le-flaneur-guesthouse/", "ok": true, "error": null, "data": {"content_type": "text/html;charset=utf-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.hostelworld.com/st/hostels/lyon/", "platform": "policy", "fetched_at": "2026-01-02T20:23:28+00:00", "status_code": 200, "final_url": "https://www.hostelworld.com/hostels/europe/france/lyon/", "ok": true, "error": null, "data": {"content_type": "text/html;charset=utf-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.instagram.com/leflaneur_gh/", "platform": "instagram", "fetched_at": "2026-01-02T20:23:30+00:00", "status_code": 200, "final_url": "https://www.instagram.com/leflaneur_gh/", "ok": true, "error": null, "data": {"content_type": "text/html; charset=\"utf-8\"", "content_length": null, "meta_description": "2,296 Followers, 889 Following, 742 Posts - Le Flâneur Guesthouse (@leflaneur_gh) on Instagram: \"🍺☕ Bar & café\n🏠Independent / Alternative / Participative /Ecofriendly / Homely hostel in the heart of Guillotière, Lyon📍\"", "follower_count": {"raw": "2,296", "value": 2296}, "last_post_date": null, "og_title": "Le Flâneur Guesthouse (@leflaneur_gh) • Instagram photos and videos", "og_url": "https://www.instagram.com/leflaneur_gh/"}}
+{"url": "https://www.tiktok.com/search?q=le%20flaneur%20guesthouse%20lyon", "platform": "tiktok", "fetched_at": "2026-01-02T20:23:32+00:00", "status_code": 403, "final_url": "https://www.tiktok.com/search?q=le%20flaneur%20guesthouse%20lyon", "ok": true, "error": null, "data": {"content_type": "text/plain; charset=utf-8", "content_length": "9", "meta_description": null, "follower_count": null, "last_post_date": null, "og_title": null, "og_url": null}}
+{"url": "https://www.tripadvisor.com/Hotel_Review-g187265-d8778985-Reviews-Le_Flaneur_Guesthouse-Lyon_Rhone_Auvergne_Rhone_Alpes.html", "platform": "tripadvisor", "fetched_at": "2026-01-02T20:23:32+00:00", "status_code": 403, "final_url": "https://www.tripadvisor.com/Hotel_Review-g187265-d8778985-Reviews-Le_Flaneur_Guesthouse-Lyon_Rhone_Auvergne_Rhone_Alpes.html", "ok": true, "error": null, "data": {"content_type": "text/html;charset=utf-8", "content_length": "775", "rating": null, "review_count": null, "badges": [], "jsonld_count": 0, "jsonld_parse_errors": []}}
--- a/verify/results/ho36_googlebot_audit.jsonl
+++ b/verify/results/ho36_googlebot_audit.jsonl
@ -0,0 +1,11 @@
+{"url": "https://ho36lyon.com/", "platform": "policy", "fetched_at": "2026-01-02T20:24:02+00:00", "status_code": 200, "final_url": "https://ho36lyon.com/", "ok": true, "error": null, "data": {"content_type": "text/html; charset=UTF-8", "content_length": "35483", "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://ho36lyon.com/en/", "platform": "policy", "fetched_at": "2026-01-02T20:24:04+00:00", "status_code": 200, "final_url": "https://ho36lyon.com/en/", "ok": true, "error": null, "data": {"content_type": "text/html; charset=UTF-8", "content_length": "35743", "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://maps.app.goo.gl/vfGnGGQxJBNwvdgX8", "platform": "policy", "fetched_at": "2026-01-02T20:24:05+00:00", "status_code": 200, "final_url": "https://www.google.fr/maps/place/HO36+Hostel+Lyon/@45.7529047,4.8394703,17z/data=!4m9!3m8!1s0x47f4ea44c206c2fd:0xb36a1c20ef67ead4!5m2!4m1!1i2!8m2!3d45.752901!4d4.8420452!16s%2Fg%2F1tnpkbvv?entry=tts&g_ep=EgoyMDI0MTAwMi4xIPu8ASoASAFQAw%3D%3D", "ok": true, "error": null, "data": {"content_type": "text/html; charset=UTF-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.booking.com/hotel/fr/ho36-hostel.html", "platform": "booking", "fetched_at": "2026-01-02T20:24:07+00:00", "status_code": 202, "final_url": "https://www.booking.com/hotel/fr/ho36-hostel.html", "ok": true, "error": null, "data": {"content_type": "text/html", "content_length": "3962", "rating": null, "review_count": null, "badges": [], "jsonld_count": 0, "jsonld_parse_errors": []}}
+{"url": "https://www.facebook.com/ho36hotels/", "platform": "facebook", "fetched_at": "2026-01-02T20:24:07+00:00", "status_code": 200, "final_url": "https://m.facebook.com/login/?next=https%3A%2F%2Fm.facebook.com%2Fho36hotels%2F", "ok": true, "error": null, "data": {"content_type": "text/html; charset=utf-8", "content_length": null, "meta_description": "登录 Facebook，与好友、家人和认识的人分享和建立联系。", "follower_count": null, "last_post_date": null, "og_title": "登录 Facebook | Facebook", "og_url": "https://www.facebook.com/"}}
+{"url": "https://www.google.com/maps/embed?pb=!1m14!1m8!1m3!1d11135.645489923992!2d4.84204!3d45.7529227!3m2!1i1024!2i768!4f13.1!3m3!1m2!1s0x0%3A0xb36a1c20ef67ead4!2sho36%20Lyon%20Guilloti%C3%A8re!5e0!3m2!1sfr!2sfr!4v1567089009427!5m2!1sfr!2sfr", "platform": "policy", "fetched_at": "2026-01-02T20:24:08+00:00", "status_code": 200, "final_url": "https://www.google.com/maps/embed?pb=!1m14!1m8!1m3!1d11135.645489923992!2d4.84204!3d45.7529227!3m2!1i1024!2i768!4f13.1!3m3!1m2!1s0x0%3A0xb36a1c20ef67ead4!2sho36%20Lyon%20Guilloti%C3%A8re!5e0!3m2!1sfr!2sfr!4v1567089009427!5m2!1sfr!2sfr", "ok": true, "error": null, "data": {"content_type": "text/html; charset=UTF-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.google.fr/maps/place/HO36+Hostel+Lyon/@45.7529047,4.8394703,17z/data=!4m9!3m8!1s0x47f4ea44c206c2fd:0xb36a1c20ef67ead4!5m2!4m1!1i2!8m2!3d45.752901!4d4.8420452!16s%2Fg%2F1tnpkbvv?entry=tts", "platform": "policy", "fetched_at": "2026-01-02T20:24:08+00:00", "status_code": 200, "final_url": "https://www.google.fr/maps/place/HO36+Hostel+Lyon/@45.7529047,4.8394703,17z/data=!4m9!3m8!1s0x47f4ea44c206c2fd:0xb36a1c20ef67ead4!5m2!4m1!1i2!8m2!3d45.752901!4d4.8420452!16s%2Fg%2F1tnpkbvv?entry=tts", "ok": true, "error": null, "data": {"content_type": "text/html; charset=UTF-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.hostelworld.com/hostels/europe/france/lyon/", "platform": "policy", "fetched_at": "2026-01-02T20:24:10+00:00", "status_code": 200, "final_url": "https://www.hostelworld.com/hostels/europe/france/lyon/", "ok": true, "error": null, "data": {"content_type": "text/html;charset=utf-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.hostelworld.com/hostels/p/270217/ho36-hostel/", "platform": "policy", "fetched_at": "2026-01-02T20:24:11+00:00", "status_code": 200, "final_url": "https://www.hostelworld.com/hostels/p/270217/ho36-hostel/", "ok": true, "error": null, "data": {"content_type": "text/html;charset=utf-8", "content_length": null, "policy_checks": [{"term": "New Year's Eve", "found": false, "snippets": []}, {"term": "minimum stay", "found": false, "snippets": []}, {"term": "sold out", "found": false, "snippets": []}]}}
+{"url": "https://www.instagram.com/ho36hotel_lyon/", "platform": "instagram", "fetched_at": "2026-01-02T20:24:12+00:00", "status_code": 200, "final_url": "https://www.instagram.com/ho36hotel_lyon/", "ok": true, "error": null, "data": {"content_type": "text/html; charset=\"utf-8\"", "content_length": null, "meta_description": "3,247 Followers, 615 Following, 108 Posts - ho36 Lyon (@ho36hotel_lyon) on Instagram: \"💙Ho(s)tel\n⚡️Lieu de vie et rencontres entre voyageurs & locaux ☕️\"", "follower_count": {"raw": "3,247", "value": 3247}, "last_post_date": null, "og_title": "ho36 Lyon (@ho36hotel_lyon) • Instagram photos and videos", "og_url": "https://www.instagram.com/ho36hotel_lyon/"}}
+{"url": "https://www.tripadvisor.fr/Hotel_Review-g187265-d293643-Reviews-Ho36_Hostel-Lyon_Rhone_Auvergne_Rhone_Alpes.html", "platform": "tripadvisor", "fetched_at": "2026-01-02T20:24:13+00:00", "status_code": 403, "final_url": "https://www.tripadvisor.fr/Hotel_Review-g187265-d293643-Reviews-Ho36_Hostel-Lyon_Rhone_Auvergne_Rhone_Alpes.html", "ok": true, "error": null, "data": {"content_type": "text/html;charset=utf-8", "content_length": "774", "rating": null, "review_count": null, "badges": [], "jsonld_count": 0, "jsonld_parse_errors": []}}
--- a/verify/tools/audit_listings.py
+++ b/verify/tools/audit_listings.py
@ -0,0 +1,521 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+from dataclasses import asdict, dataclass
+from datetime import datetime, timezone
+from typing import Any, Iterable, Literal
+from urllib.parse import urlparse
+
+import requests
+try:
+    from bs4 import BeautifulSoup
+except ImportError as exc:  # pragma: no cover
+    raise SystemExit(
+        "Missing dependency 'beautifulsoup4'. Install with:\n"
+        "  python3 -m pip install beautifulsoup4\n"
+        "or (recommended) inside a venv:\n"
+        "  python3 -m venv .venv && . .venv/bin/activate && pip install beautifulsoup4\n"
+    ) from exc
+
+
+Platform = Literal[
+    "auto",
+    "booking",
+    "tripadvisor",
+    "instagram",
+    "facebook",
+    "tiktok",
+    "policy",
+]
+
+GOOGLEBOT_UA = (
+    "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/41.0.2272.96 Mobile Safari/537.36 "
+    "(compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
+)
+
+
+@dataclass(frozen=True)
+class AuditResult:
+    url: str
+    platform: str
+    fetched_at: str
+    status_code: int | None
+    final_url: str | None
+    ok: bool
+    error: str | None
+    data: dict[str, Any]
+
+
+def iso_now() -> str:
+    return datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds")
+
+
+def detect_platform(url: str) -> Platform:
+    host = (urlparse(url).netloc or "").lower()
+    if "booking.com" in host:
+        return "booking"
+    if "tripadvisor." in host or "tripadvisor.com" in host:
+        return "tripadvisor"
+    if "instagram.com" in host:
+        return "instagram"
+    if "facebook.com" in host or host.endswith("fb.com"):
+        return "facebook"
+    if "tiktok.com" in host:
+        return "tiktok"
+    return "policy"
+
+
+def normalize_platform(platform: str) -> Platform | None:
+    key = re.sub(r"[^a-z0-9]+", "", (platform or "").strip().lower())
+    if key in ("", "auto", "detect"):
+        return "auto"
+    if key in ("booking", "bookingcom"):
+        return "booking"
+    if key in ("tripadvisor", "tripadvisorcom", "tripadvisorfr", "tripadvisoruk"):
+        return "tripadvisor"
+    if key in ("instagram", "insta", "ig"):
+        return "instagram"
+    if key in ("facebook", "fb"):
+        return "facebook"
+    if key in ("tiktok", "tik", "tik_tok", "ticktok"):
+        return "tiktok"
+    if key in ("policy", "site", "general"):
+        return "policy"
+    return None
+
+
+def normalize_whitespace(text: str) -> str:
+    return re.sub(r"\s+", " ", text).strip()
+
+
+def parse_human_number(raw: str) -> int | None:
+    s = raw.strip().replace("\u202f", "").replace("\xa0", "").replace(" ", "")
+    match = re.match(r"^(?P<num>\d+(?:[.,]\d+)?)(?P<suffix>[KkMmBb])?$", s)
+    if not match:
+        return None
+
+    num_part = match.group("num")
+    suffix = (match.group("suffix") or "").upper()
+
+    if "," in num_part and "." in num_part:
+        num_part = num_part.replace(",", "")
+    elif "," in num_part and "." not in num_part:
+        parts = num_part.split(",")
+        if len(parts) > 1 and len(parts[-1]) == 3:
+            num_part = "".join(parts)
+        else:
+            num_part = num_part.replace(",", ".")
+
+    try:
+        value = float(num_part)
+    except ValueError:
+        return None
+
+    multiplier = {"": 1, "K": 1_000, "M": 1_000_000, "B": 1_000_000_000}.get(suffix)
+    if multiplier is None:
+        return None
+    return int(round(value * multiplier))
+
+
+def fetch_url(
+    session: requests.Session,
+    url: str,
+    *,
+    timeout_s: float,
+    user_agent: str,
+) -> tuple[requests.Response | None, str | None]:
+    try:
+        resp = session.get(
+            url,
+            headers={
+                "User-Agent": user_agent,
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+            },
+            timeout=timeout_s,
+            allow_redirects=True,
+        )
+        return resp, None
+    except requests.RequestException as exc:
+        return None, str(exc)
+
+
+def collect_meta(soup: BeautifulSoup) -> dict[str, list[str]]:
+    out: dict[str, list[str]] = {}
+    for tag in soup.find_all("meta"):
+        key = tag.get("property") or tag.get("name")
+        if not key:
+            continue
+        content = tag.get("content")
+        if not content:
+            continue
+        out.setdefault(key.strip().lower(), []).append(content.strip())
+    return out
+
+
+def first_meta(meta: dict[str, list[str]], keys: Iterable[str]) -> str | None:
+    for key in keys:
+        values = meta.get(key.lower())
+        if values:
+            return values[0]
+    return None
+
+
+def clean_jsonld_text(raw: str) -> str:
+    s = raw.strip()
+    s = re.sub(r"^\s*<!--", "", s)
+    s = re.sub(r"-->\s*$", "", s)
+    s = re.sub(r"^\s*/\*+\s*<!\[CDATA\[\s*\*/\s*", "", s)
+    s = re.sub(r"\s*/\*+\s*\]\]>\s*\*/\s*$", "", s)
+    return s.strip()
+
+
+def extract_jsonld_objects(soup: BeautifulSoup) -> tuple[list[Any], list[dict[str, str]]]:
+    objects: list[Any] = []
+    errors: list[dict[str, str]] = []
+    for script in soup.find_all("script", attrs={"type": re.compile(r"^application/ld\+json$", re.I)}):
+        raw = script.string or script.get_text() or ""
+        raw = clean_jsonld_text(raw)
+        if not raw:
+            continue
+        try:
+            objects.append(json.loads(raw))
+            continue
+        except json.JSONDecodeError:
+            pass
+
+        try:
+            patched = "[" + re.sub(r"}\s*{", "},{", raw) + "]"
+            objects.append(json.loads(patched))
+        except json.JSONDecodeError as exc:
+            errors.append({"error": str(exc), "snippet": raw[:400]})
+    return objects, errors
+
+
+def iter_dicts(obj: Any) -> Iterable[dict[str, Any]]:
+    if isinstance(obj, dict):
+        yield obj
+        graph = obj.get("@graph")
+        if isinstance(graph, list):
+            for item in graph:
+                yield from iter_dicts(item)
+        for value in obj.values():
+            yield from iter_dicts(value)
+    elif isinstance(obj, list):
+        for item in obj:
+            yield from iter_dicts(item)
+
+
+def coerce_float(value: Any) -> float | None:
+    if value is None:
+        return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    if isinstance(value, str):
+        v = value.strip().replace(",", ".")
+        try:
+            return float(v)
+        except ValueError:
+            return None
+    return None
+
+
+def coerce_int(value: Any) -> int | None:
+    if value is None:
+        return None
+    if isinstance(value, int):
+        return value
+    if isinstance(value, float):
+        return int(value)
+    if isinstance(value, str):
+        v = re.sub(r"[^\d]", "", value)
+        return int(v) if v else None
+    return None
+
+
+def pick_best_aggregate_rating(jsonld: list[Any]) -> dict[str, Any] | None:
+    candidates: list[tuple[int, float, dict[str, Any]]] = []
+    for obj in jsonld:
+        for d in iter_dicts(obj):
+            agg = d.get("aggregateRating")
+            if not isinstance(agg, dict):
+                continue
+            rating_value = coerce_float(agg.get("ratingValue"))
+            if rating_value is None:
+                continue
+            review_count = coerce_int(agg.get("reviewCount") or agg.get("ratingCount"))
+            score = (review_count or 0) * 10 + int(round(rating_value * 100))
+            candidates.append(
+                (
+                    score,
+                    rating_value,
+                    {
+                        "rating_value": rating_value,
+                        "review_count": review_count,
+                        "best_rating": coerce_float(agg.get("bestRating")),
+                        "worst_rating": coerce_float(agg.get("worstRating")),
+                        "source": "jsonld",
+                        "aggregate_rating": agg,
+                        "parent_types": d.get("@type"),
+                    },
+                )
+            )
+    candidates.sort(reverse=True)
+    return candidates[0][2] if candidates else None
+
+
+def extract_awards(jsonld: list[Any]) -> list[str]:
+    awards: list[str] = []
+    seen: set[str] = set()
+    for obj in jsonld:
+        for d in iter_dicts(obj):
+            raw = d.get("award") or d.get("awards")
+            if isinstance(raw, str):
+                items = [raw]
+            elif isinstance(raw, list):
+                items = [x for x in raw if isinstance(x, str)]
+            else:
+                items = []
+            for item in items:
+                cleaned = normalize_whitespace(item)
+                if not cleaned or cleaned.lower() in seen:
+                    continue
+                seen.add(cleaned.lower())
+                awards.append(cleaned)
+    return awards
+
+
+BADGE_KEYWORDS = [
+    "travellers' choice",
+    "travelers' choice",
+    "traveller review award",
+    "traveler review award",
+    "greenleaders",
+    "green leader",
+    "travel sustainable",
+    "preferred partner",
+    "genius",
+    "key collection",
+]
+
+
+def extract_badges_from_html(soup: BeautifulSoup) -> list[str]:
+    text = normalize_whitespace(soup.get_text(" ", strip=True))
+    lowered = text.lower()
+    hits: list[str] = []
+    for keyword in BADGE_KEYWORDS:
+        if keyword in lowered:
+            hits.append(keyword)
+    return sorted(set(hits))
+
+
+def extract_followers_from_description(description: str) -> dict[str, Any] | None:
+    patterns = [
+        r"(?P<count>\d[\d.,\s\u202f\xa0]*[KkMmBb]?)\s+followers?\b",
+        r"(?P<count>\d[\d.,\s\u202f\xa0]*[KkMmBb]?)\s+abonn[eé]s?\b",
+        r"(?P<count>\d[\d.,\s\u202f\xa0]*[KkMmBb]?)\s+people\s+like\s+this\b",
+        r"(?P<count>\d[\d.,\s\u202f\xa0]*[KkMmBb]?)\s+likes\b",
+    ]
+    lowered = description.lower()
+    for pattern in patterns:
+        match = re.search(pattern, lowered, flags=re.IGNORECASE)
+        if not match:
+            continue
+        raw = match.group("count")
+        return {"raw": raw, "value": parse_human_number(raw)}
+    return None
+
+
+DATE_META_KEYS = [
+    "article:published_time",
+    "article:modified_time",
+    "og:published_time",
+    "og:updated_time",
+    "og:video:release_date",
+    "last-modified",
+]
+
+
+def parse_first_iso_datetime(values: Iterable[str]) -> str | None:
+    for value in values:
+        v = value.strip()
+        try:
+            return datetime.fromisoformat(v).isoformat()
+        except ValueError:
+            continue
+    return None
+
+
+def audit_booking_or_tripadvisor(
+    soup: BeautifulSoup,
+    *,
+    include_jsonld: bool,
+) -> dict[str, Any]:
+    jsonld, jsonld_errors = extract_jsonld_objects(soup)
+    rating = pick_best_aggregate_rating(jsonld)
+    awards = extract_awards(jsonld)
+    badges = sorted(set(awards + extract_badges_from_html(soup)))
+
+    data: dict[str, Any] = {
+        "rating": rating,
+        "review_count": rating.get("review_count") if rating else None,
+        "badges": badges,
+        "jsonld_count": len(jsonld),
+        "jsonld_parse_errors": jsonld_errors,
+    }
+    if include_jsonld:
+        data["jsonld"] = jsonld
+    return data
+
+
+def audit_social(
+    soup: BeautifulSoup,
+) -> dict[str, Any]:
+    meta = collect_meta(soup)
+    description = first_meta(meta, ["description", "og:description", "twitter:description"]) or ""
+    followers = extract_followers_from_description(description) if description else None
+
+    raw_date = first_meta(meta, DATE_META_KEYS)
+    last_post_date = parse_first_iso_datetime([raw_date]) if raw_date else None
+
+    return {
+        "meta_description": description or None,
+        "follower_count": followers,
+        "last_post_date": {"raw": raw_date, "value": last_post_date} if (raw_date or last_post_date) else None,
+        "og_title": first_meta(meta, ["og:title"]),
+        "og_url": first_meta(meta, ["og:url"]),
+    }
+
+
+POLICY_TERMS = [
+    "New Year's Eve",
+    "minimum stay",
+    "sold out",
+]
+
+
+def find_term_snippets(text: str, term: str, *, max_hits: int = 3, context: int = 60) -> list[str]:
+    pattern = re.escape(term)
+    if term.lower() == "new year's eve":
+        pattern = r"new\s+year(?:'|\u2019)?s?\s+eve"
+    regex = re.compile(pattern, flags=re.IGNORECASE)
+
+    snippets: list[str] = []
+    for match in regex.finditer(text):
+        start = max(0, match.start() - context)
+        end = min(len(text), match.end() + context)
+        snippets.append(normalize_whitespace(text[start:end]))
+        if len(snippets) >= max_hits:
+            break
+    return snippets
+
+
+def audit_policy(soup: BeautifulSoup) -> dict[str, Any]:
+    text = normalize_whitespace(soup.get_text(" ", strip=True))
+    checks: list[dict[str, Any]] = []
+    for term in POLICY_TERMS:
+        snippets = find_term_snippets(text, term)
+        checks.append({"term": term, "found": bool(snippets), "snippets": snippets})
+    return {"policy_checks": checks}
+
+
+def audit_listing(
+    url: str,
+    platform: str,
+    *,
+    timeout_s: float = 25.0,
+    user_agent: str = GOOGLEBOT_UA,
+    include_jsonld: bool = False,
+) -> AuditResult:
+    normalized = normalize_platform(platform)
+    resolved_platform: Platform
+    if normalized is None or normalized == "auto":
+        resolved_platform = detect_platform(url)
+    else:
+        resolved_platform = normalized
+    fetched_at = iso_now()
+
+    session = requests.Session()
+    resp, error = fetch_url(session, url, timeout_s=timeout_s, user_agent=user_agent)
+    if error or resp is None:
+        return AuditResult(
+            url=url,
+            platform=resolved_platform,
+            fetched_at=fetched_at,
+            status_code=None,
+            final_url=None,
+            ok=False,
+            error=error or "unknown error",
+            data={},
+        )
+
+    content_type = (resp.headers.get("Content-Type") or "").lower()
+    html = resp.text if "html" in content_type or "<html" in resp.text[:200].lower() else resp.text
+    soup = BeautifulSoup(html, "html.parser")
+
+    data: dict[str, Any] = {
+        "content_type": resp.headers.get("Content-Type"),
+        "content_length": resp.headers.get("Content-Length"),
+    }
+
+    if resolved_platform in ("booking", "tripadvisor"):
+        data.update(audit_booking_or_tripadvisor(soup, include_jsonld=include_jsonld))
+    elif resolved_platform in ("instagram", "facebook", "tiktok"):
+        data.update(audit_social(soup))
+    elif resolved_platform == "policy":
+        data.update(audit_policy(soup))
+    else:
+        data.update(audit_policy(soup))
+
+    return AuditResult(
+        url=url,
+        platform=resolved_platform,
+        fetched_at=fetched_at,
+        status_code=resp.status_code,
+        final_url=str(resp.url) if resp.url else None,
+        ok=True,
+        error=None,
+        data=data,
+    )
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Audit listing pages using a Googlebot user-agent (ratings/reviews/badges, followers, policy text)."
+    )
+    parser.add_argument(
+        "--platform",
+        default="auto",
+        help="Platform hint (auto/booking/tripadvisor/instagram/facebook/tiktok/policy).",
+    )
+    parser.add_argument("--timeout", type=float, default=25.0)
+    parser.add_argument("--user-agent", default=GOOGLEBOT_UA)
+    parser.add_argument("--include-jsonld", action="store_true", help="Include parsed JSON-LD blobs in output.")
+    parser.add_argument("urls", nargs="+", help="One or more URLs to audit")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    exit_code = 0
+    for url in args.urls:
+        result = audit_listing(
+            url,
+            args.platform,
+            timeout_s=args.timeout,
+            user_agent=args.user_agent,
+            include_jsonld=bool(args.include_jsonld),
+        )
+        os.write(1, (json.dumps(asdict(result), ensure_ascii=False) + "\n").encode("utf-8"))
+        if not result.ok:
+            exit_code = 2
+    return exit_code
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())