Sanitize DM reports and add Mermaid tooling

2025-12-24 13:13:48 +00:00 · 2025-12-24 13:13:48 +00:00 · a140b3787a
commit a140b3787a
parent a6222083e6
10 changed files with 516 additions and 427 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,4 +7,7 @@ venv/
 .vscode/
 /dist/
 /build/
-
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
--- a/README.md
+++ b/README.md
@ -103,6 +103,12 @@ This produces the “Sergio persona” artifacts needed for the DM agent:

 Outputs are written with mode `600` and may contain sensitive DM content. Keep them out of git.

+This repo includes **sanitized** example reports (no verbatim client DMs) under:
+
+- `reports/socialmediatorr/`
+
+Raw analysis artifacts (e.g., training pairs, rescued threads, template caches) should remain in a private working directory such as `/root/tmp/` and should not be committed.
+
 ### Analyze a raw Instagram export folder (recommended)

 Optional: index first (lets you filter recency without scanning every thread):
--- a/package-lock.json
+++ b/package-lock.json
@ -0,0 +1,174 @@
+{
+  "name": "emo-social-insta-dm-agent-tools",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "emo-social-insta-dm-agent-tools",
+      "devDependencies": {
+        "@mermaid-js/parser": "^0.6.3"
+      }
+    },
+    "node_modules/@chevrotain/cst-dts-gen": {
+      "version": "11.0.3",
+      "resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-11.0.3.tgz",
+      "integrity": "sha512-BvIKpRLeS/8UbfxXxgC33xOumsacaeCKAjAeLyOn7Pcp95HiRbrpl14S+9vaZLolnbssPIUuiUd8IvgkRyt6NQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@chevrotain/gast": "11.0.3",
+        "@chevrotain/types": "11.0.3",
+        "lodash-es": "4.17.21"
+      }
+    },
+    "node_modules/@chevrotain/gast": {
+      "version": "11.0.3",
+      "resolved": "https://registry.npmjs.org/@chevrotain/gast/-/gast-11.0.3.tgz",
+      "integrity": "sha512-+qNfcoNk70PyS/uxmj3li5NiECO+2YKZZQMbmjTqRI3Qchu8Hig/Q9vgkHpI3alNjr7M+a2St5pw5w5F6NL5/Q==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@chevrotain/types": "11.0.3",
+        "lodash-es": "4.17.21"
+      }
+    },
+    "node_modules/@chevrotain/regexp-to-ast": {
+      "version": "11.0.3",
+      "resolved": "https://registry.npmjs.org/@chevrotain/regexp-to-ast/-/regexp-to-ast-11.0.3.tgz",
+      "integrity": "sha512-1fMHaBZxLFvWI067AVbGJav1eRY7N8DDvYCTwGBiE/ytKBgP8azTdgyrKyWZ9Mfh09eHWb5PgTSO8wi7U824RA==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/@chevrotain/types": {
+      "version": "11.0.3",
+      "resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-11.0.3.tgz",
+      "integrity": "sha512-gsiM3G8b58kZC2HaWR50gu6Y1440cHiJ+i3JUvcp/35JchYejb2+5MVeJK0iKThYpAa/P2PYFV4hoi44HD+aHQ==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/@chevrotain/utils": {
+      "version": "11.0.3",
+      "resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-11.0.3.tgz",
+      "integrity": "sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==",
+      "dev": true,
+      "license": "Apache-2.0"
+    },
+    "node_modules/@mermaid-js/parser": {
+      "version": "0.6.3",
+      "resolved": "https://registry.npmjs.org/@mermaid-js/parser/-/parser-0.6.3.tgz",
+      "integrity": "sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "langium": "3.3.1"
+      }
+    },
+    "node_modules/chevrotain": {
+      "version": "11.0.3",
+      "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.0.3.tgz",
+      "integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@chevrotain/cst-dts-gen": "11.0.3",
+        "@chevrotain/gast": "11.0.3",
+        "@chevrotain/regexp-to-ast": "11.0.3",
+        "@chevrotain/types": "11.0.3",
+        "@chevrotain/utils": "11.0.3",
+        "lodash-es": "4.17.21"
+      }
+    },
+    "node_modules/chevrotain-allstar": {
+      "version": "0.3.1",
+      "resolved": "https://registry.npmjs.org/chevrotain-allstar/-/chevrotain-allstar-0.3.1.tgz",
+      "integrity": "sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "lodash-es": "^4.17.21"
+      },
+      "peerDependencies": {
+        "chevrotain": "^11.0.0"
+      }
+    },
+    "node_modules/langium": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/langium/-/langium-3.3.1.tgz",
+      "integrity": "sha512-QJv/h939gDpvT+9SiLVlY7tZC3xB2qK57v0J04Sh9wpMb6MP1q8gB21L3WIo8T5P1MSMg3Ep14L7KkDCFG3y4w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "chevrotain": "~11.0.3",
+        "chevrotain-allstar": "~0.3.0",
+        "vscode-languageserver": "~9.0.1",
+        "vscode-languageserver-textdocument": "~1.0.11",
+        "vscode-uri": "~3.0.8"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
+    "node_modules/lodash-es": {
+      "version": "4.17.21",
+      "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.21.tgz",
+      "integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vscode-jsonrpc": {
+      "version": "8.2.0",
+      "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz",
+      "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/vscode-languageserver": {
+      "version": "9.0.1",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver/-/vscode-languageserver-9.0.1.tgz",
+      "integrity": "sha512-woByF3PDpkHFUreUa7Hos7+pUWdeWMXRd26+ZX2A8cFx6v/JPTtd4/uN0/jB6XQHYaOlHbio03NTHCqrgG5n7g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "vscode-languageserver-protocol": "3.17.5"
+      },
+      "bin": {
+        "installServerIntoExtension": "bin/installServerIntoExtension"
+      }
+    },
+    "node_modules/vscode-languageserver-protocol": {
+      "version": "3.17.5",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz",
+      "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "vscode-jsonrpc": "8.2.0",
+        "vscode-languageserver-types": "3.17.5"
+      }
+    },
+    "node_modules/vscode-languageserver-textdocument": {
+      "version": "1.0.12",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver-textdocument/-/vscode-languageserver-textdocument-1.0.12.tgz",
+      "integrity": "sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vscode-languageserver-types": {
+      "version": "3.17.5",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz",
+      "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/vscode-uri": {
+      "version": "3.0.8",
+      "resolved": "https://registry.npmjs.org/vscode-uri/-/vscode-uri-3.0.8.tgz",
+      "integrity": "sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==",
+      "dev": true,
+      "license": "MIT"
+    }
+  }
+}
--- a/package.json
+++ b/package.json
@ -0,0 +1,11 @@
+{
+  "name": "emo-social-insta-dm-agent-tools",
+  "private": true,
+  "type": "module",
+  "devDependencies": {
+    "@mermaid-js/parser": "^0.6.3"
+  },
+  "scripts": {
+    "verify:mermaid": "node tools/verify_mermaid.mjs"
+  }
+}
--- a/reports/socialmediatorr/dm_history_report_en.md
+++ b/reports/socialmediatorr/dm_history_report_en.md
@ -1,163 +1,42 @@
-# Socialmediatorr Instagram DM History — Human Readable Report (English)
+# Instagram DM History — Short Report (English)

 - Generated: `2025-12-24T02:28:34+00:00`
- Owner name used: `Sergio de Vocht`
+- Inbox: `@socialmediatorr`

-## 1) What This Dataset Represents
+## What This Is

-This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergio’s style.
-The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio).
+This is a short, plain-English summary of the DM history scan.
+It avoids quoting private messages and it avoids storing personal identities.

-## 2) High-Level Metrics (All-Time)
+## Key Numbers

 - Conversations analyzed: **10,061**
- Bot-only conversations: **1,883** (18.7%)
- Human-intervened conversations: **8,153** (81.0%)
- Conversion (intent signals): **1,923** (19.1%)
- Conversion (confirmed signals): **55** (0.5%)
+- Conversations that stayed template-only: **1,883** (18.7%)
+- Conversations that included custom replies: **8,153** (81.0%)
+- Buying/booking signals (weak): **1,923** (19.1%)
+- Buying/booking signals (strong): **55** (0.5%)

-Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger.
+Buying/booking signals are detected from text patterns (they are not a payment ledger).

-## 3) Sergio Persona (From Manual/Hybrid Replies)
+## What You Need to Know

- Typical reply length: median **60.0** chars (p90 **67.0**)
- Questions: **2.4%** | Exclamations: **1.7%** | Emoji: **0.0%**
- Language guess (manual replies): en=8043, es=423, unknown=224
+The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.
+For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:

-Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.
+- `reports/socialmediatorr/dm_history_report_en_detailed.md`

-## 4) Bot vs Human Segmentation (What It Means)
+## Useful Inventory (Safe Counts Only)

- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).
- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).
- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity).
+- Total outgoing templates detected: **8,550**
+- High-frequency repeat templates: **24**
+- “Rescue” events detected: **7**
+- Training pairs (user → reply) available: **524**

-This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune.
+## What You Do Not Need to Know

-## 5) Top Detected Script Templates (Canonicalized)
-
- BOT #1: sent **2495**× — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
- BOT #2: sent **2483**× — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien contigo mism`
- BOT #3: sent **2483**× — `te lo dejo por aquã dame un minuto`
- BOT #4: sent **2483**× — `me gustarã a saber tu opiniã³n`
- BOT #5: sent **1878**× — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando paz`
- BOT #6: sent **1878**× — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influyendo en tus relacione`
- BOT #7: sent **706**× — `gracias por ese feedback ð`
- BOT #8: sent **706**× — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s conflicto ãºltimament`
- BOT #9: sent **680**× — `you sent a private reply to a comment on your instagram post`
- BOT #10: sent **469**× — `por cierto`
-
-## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)
-
- MANUAL-ish #1: seen **10**× — `quã bonito leer eso a veces entender las palabras abre puertas nuevas â sientes que en tu entorno hay algo que te gustarã a armonizar mã s`
- MANUAL-ish #2: seen **7**× — `buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c`
- MANUAL-ish #3: seen **5**× — `hola buenas como estas ð espero que estã s bien me gustarã a saber que es lo q te ha echo estar aquã y querer saber mã s sobre nuestras formaciã³n`
- MANUAL-ish #4: seen **5**× — `y si pudieras resolver esto cã³mo crees que cambiarã a tu forma de relacionarte o sentirte`
- MANUAL-ish #5: seen **5**× — `para conocerte un poquito mã s que te gustarã a conseguir con emosocial cual es tu mayor desafã o actualmente dentro de tus relaciones`
- MANUAL-ish #6: seen **4**× — `okey te entiendo perfectamente ð segãºn lo que me comentas creo que esta lista de videos de youtube te va a venir genial para empezar a entender las bases del c`
- MANUAL-ish #7: seen **4**× — `buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c`
- MANUAL-ish #8: seen **3**× — `hola buenas como estas espero que bien cuã ntame que te parece el contenido que estamos ofreciendo por whatsapp te leoð ð`
-
-## 7) Bot Template Performance (Reply/Conversion Heuristics)
-
-These come from `bot_performance_audit.csv` and are computed per canonical bot template.
-
-### Most-used bot templates (by volume)
- sent=2495 reply_rate=0.376 intent_rate=0.0766 confirmed_rate=0.012 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
- sent=2483 reply_rate=0.0334 intent_rate=0.0769 confirmed_rate=0.0121 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co`
- sent=2483 reply_rate=0.1188 intent_rate=0.0769 confirmed_rate=0.0121 — `te lo dejo por aquã dame un minuto`
- sent=2483 reply_rate=0.0028 intent_rate=0.0769 confirmed_rate=0.0121 — `me gustarã a saber tu opiniã³n`
- sent=1878 reply_rate=0.0 intent_rate=0.0 confirmed_rate=0.0005 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando pa`
- sent=1878 reply_rate=0.1768 intent_rate=0.0 confirmed_rate=0.0005 — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influye`
- sent=706 reply_rate=0.0042 intent_rate=0.1048 confirmed_rate=0.017 — `gracias por ese feedback ð`
- sent=706 reply_rate=0.8187 intent_rate=0.1048 confirmed_rate=0.017 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c`
-
-### Best reply-rate bot templates
- reply_rate=0.8187 sent=706 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c`
- reply_rate=0.7143 sent=98 — `pudiste entrar correctamente`
- reply_rate=0.7022 sent=178 — `por favor toca una de las siguientes opciones ð`
- reply_rate=0.4701 sent=134 — `pudiste verlo`
- reply_rate=0.4602 sent=176 — `que te pareciã³ ese diccionario hay alguna palabra que sueles utilizar y no te habã as dado cuenta`
- reply_rate=0.376 sent=2495 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
- reply_rate=0.3458 sent=240 — `gracias por tu sinceridad ð`
- reply_rate=0.3291 sent=158 — `te dejo este video donde explico por quã las relaciones de pareja entran en conflicto aunque haya amor`
-
-### Worst reply-rate bot templates
- reply_rate=0.0 sent=1878 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando pa`
- reply_rate=0.0 sent=337 — `enhorabuena por querer dar ese cambio estã s a un paso de transformar tu relaciã³n en solo <NUM> dã as te invito a un taller exclusivo donde`
- reply_rate=0.0 sent=158 — `gracias por compartirlo â ï`
- reply_rate=0.0 sent=131 — `entiendo perfectamente ð`
- reply_rate=0.0 sent=54 — `this account can t receive your message because they don t allow new message requests from everyone`
- reply_rate=0.0028 sent=2483 — `me gustarã a saber tu opiniã³n`
- reply_rate=0.0042 sent=706 — `gracias por ese feedback ð`
- reply_rate=0.0334 sent=2483 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co`
-
-## 8) Objections → Best Sergio Replies (Playbook)
-
-### price
- (1) Ey Alex que tal
- (1) QuÃ© bonito leer eso. A veces entender las palabras abre puertas nuevas. Â¿Sientes que en tu entorno hay algo que te gustarÃa armonizar mÃ¡s?
- (1) Y que es lo que te impide dar ese cambio? Te veo con mucha seguridad
-### time
- (1) Brutal esto que dices
- (1) No es una herida ELA! Apego que no te dieron tus padres es solo una parte del espectro, necesitamos validaciÃ³n del mundo y de forma constante, no es una herida del pasado es algo que falta darnos en el presente.
- (1) Vaya, suena bastante frustrante el hecho de querer "bajar esa guardia", y sentir que cuando lo haces, todo cambia
-### trust
- (2) Hola Dani, gracias por el mensaje bonito, de verdad. Me alegra mucho saber que el contenido te estÃ¡ ayudando a mirar las cosas desde otro punto de vista
- (2) QuÃ© bonito leer eso. A veces entender las palabras abre puertas nuevas. Â¿Sientes que en tu entorno hay algo que te gustarÃa armonizar mÃ¡s?
- (2) En la plataforma no sale por ningÃºn lugar, y normalmente siempre llegan 2 emails, 1 de confirmaciÃ³n de pago y otro de bienvenida
-
-## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)
-
- Rescue events detected (heuristic): **7**
-A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal.
-
-## 10) Product / Offer Evolution (Eras)
-
-This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly.
-
-Recent quarters (top extracted offer signals):
- stripe(1)
- book(1912); ebook(1912); call(8); calendly(7); coaching(2); stripe(2); pdf(2); paypal(1)
-
-## 11) Charts
-
- Bot fatigue (weekly reply rate to the dominant bot script): `bot_fatigue_chart.png`
-
-![](bot_fatigue_chart.png)
- Editorial timeline (top bot scripts vs conversions): `editorial_timeline.png`
-
-![](editorial_timeline.png)
-
-## 12) What To Build From This (Agent Requirements)
-
-### Core behavior
- Start with top bot templates for predictable openers and FAQ-style flows.
- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.
- Use a rescue cadence (time-based triggers) after silence.
-
-### Data products to drive the agent
- Training pairs (manual-only, converted threads): `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/training_pairs.jsonl` (rows: ~524)
- Objection handlers: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/objection_handlers.json`
- Rescue playbook: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/rescue_playbook.json`
- Script templates + editorial drift: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/top_outgoing_templates.json`
-
-### Safety boundaries (recommended)
- Never request or store passwords/2FA codes.
- Avoid medical/legal/financial advice; redirect to a call or a human.
- If user asks to move off-platform, follow Sergio’s historical policy and business rules.
-
-## 13) What We Do NOT Need To Know (Ignore / Do Not Store)
-
- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.
- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.
- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.
- Individual one-off edge cases that never repeat (unless they represent a safety boundary).
- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.
-
-## 14) Caveats / Gaps
-
- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.
- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.
- Language detection is heuristic; improve it if you need precise bilingual routing.
+Do not store or copy these into an automation system unless you have a clear operational reason:
+- Names, handles, phone numbers, emails.
+- Full conversation transcripts for every thread.
+- Photos, videos, audio, and other attachments.
+- One-off edge cases that never repeat.

--- a/reports/socialmediatorr/dm_history_report_en_detailed.md
+++ b/reports/socialmediatorr/dm_history_report_en_detailed.md
@ -1,25 +1,22 @@
-# Socialmediatorr Instagram DM History : Plain-English Deep Report
+# Instagram DM History — Plain-English Deep Report

-## DM History Deep Report
+## What This Is

-**Subject:** Instagram direct messages for `@socialmediatorr`
-**Version:** v1.0 (STYLE BIBLE EN 3.0GM)
+**Inbox:** `@socialmediatorr`
 **Date:** 2025-12-24
-**Status:** REVIEW REQUIRED
-**Citation:** `if://report/socialmediatorr/instagram/dm-history/`
-**Author:** Danny Stocker | InfraFabric Research
+**Time zone used:** CET

 ### How This Report Was Made

-> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.
+> This is a count of patterns. It is not a therapy note and it is not a sales ledger.

 This document was generated by reading an Instagram data export and counting repeat patterns over time. It avoids quoting private client messages and it avoids storing personal identities.

 ---

-**Context:** This inbox contains a high-volume message-and-reply system over 429 days.
+**Context:** This inbox contains message history over 429 days.

-> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.
+> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.

 The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust.

@ -35,7 +32,7 @@ Across the observed window, you sent a very large number of messages and you rec
 | Total messages | 54,069 | Instagram export |
 | Messages you sent | 43,607 | Instagram export |
 | Messages people sent you | 10,462 | Instagram export |
-| Messages that look like a question or a request | 2,713 | Instagram export |
+| Messages that look like a question or a request | 2,715 | Instagram export |
 | System messages about new followers (auto text in the inbox) | 8,081 | Instagram export |

 ### What You Need to Know (In Plain English)
@ -67,7 +64,7 @@ To avoid guesswork, we start with 3-month blocks (a simple way to smooth noise),
 | 2025 Jan-Mar | 21 | 0 | 0 |
 | 2025 Apr-Jun | 92 | 97 | 15 |
 | 2025 Jul-Sep | 623 | 882 | 89 |
-| 2025 Oct-Dec | 9,712 | 42,628 | 2,609 |
+| 2025 Oct-Dec | 9,712 | 42,628 | 2,611 |

 Same data as charts:

@ -112,8 +109,8 @@ This month-by-month table is the clearest view of how the inbox changed over tim
 | 2025-08 | 193 | 230 | 28 | 50.0% |
 | 2025-09 | 284 | 330 | 24 | 20.8% |
 | 2025-10 | 787 | 1,190 | 64 | 17.2% |
-| 2025-11 | 854 | 2,194 | 149 | 46.3% |
-| 2025-12 | 8,071 | 39,244 | 2,396 | 89.6% |
+| 2025-11 | 854 | 2,194 | 150 | 46.7% |
+| 2025-12 | 8,071 | 39,244 | 2,397 | 89.7% |

 The busiest month was **2025-12** with **47,315** messages total (87.5% of everything in this export). That single month dominates the shape of the data.

@ -126,7 +123,7 @@ Use this to time follow-ups and first messages. Do not spread effort evenly acro
 | Day of week | Messages from people | Messages you sent | Questions/requests |
 |---|---:|---:|---:|
 | Monday | 1,600 | 8,359 | 131 |
-| Tuesday | 1,939 | 9,654 | 192 |
+| Tuesday | 1,939 | 9,654 | 194 |
 | Wednesday | 1,282 | 5,554 | 159 |
 | Thursday | 2,261 | 6,908 | 1,268 |
 | Friday | 1,705 | 5,733 | 803 |
@ -184,7 +181,7 @@ One caution: “fast replies” are often repeat messages. This section shows ov
 | Typical time to reply to questions/requests | 2 seconds | Instagram export |
 | Slow end for questions/requests (90% are faster) | 4 seconds | Instagram export |
 | Messages from people answered within 48 hours | 7,467 (71.4%) | Instagram export |
-| Questions/requests answered within 48 hours | 2,278 (84.0%) | Instagram export |
+| Questions/requests answered within 48 hours | 2,280 (84.0%) | Instagram export |

 Breakdown by message type (repeat messages vs custom messages):

@ -242,11 +239,11 @@ This list is grouped by meaning (not by exact wording). It includes very short r
 | Rank | Topic (plain English) | Count | Share of all questions/requests |
 |---:|---|---:|---:|
 | 1 | Just one word: book | 1,857 | 68.4% |
-| 2 | What is this? | 206 | 7.6% |
-| 3 | Can you send the video? | 191 | 7.0% |
-| 4 | Other question | 120 | 4.4% |
+| 2 | What is this? | 203 | 7.5% |
+| 3 | Can you send the video? | 189 | 7.0% |
+| 4 | Other question | 118 | 4.3% |
 | 5 | Can you help me? | 74 | 2.7% |
-| 6 | Can you send the link? | 61 | 2.2% |
+| 6 | Can you send the link? | 70 | 2.6% |
 | 7 | What does it cost? | 53 | 2.0% |
 | 8 | Is this therapy? | 44 | 1.6% |
 | 9 | Where do I get the book? | 36 | 1.3% |
@ -261,12 +258,12 @@ This list is grouped by meaning (not by exact wording). It includes very short r
 | 18 | Can I get a refund? | 1 | 0.0% |
 | 19 | How long does it take? | 1 | 0.0% |

-In plain terms: **1,893** of **2,713** questions/requests are about the book (69.8%).
+In plain terms: **1,893** of **2,715** questions/requests are about the book (69.7%).

 ```mermaid
 pie title Questions/Requests: Book vs Everything Else
  "Book" : 1893
-  "Everything else" : 820
+  "Everything else" : 822
 ```

 ### Content Patterns (What You Mention When You Sell)
--- a/sergio_instagram_messaging/generate_dm_report.py
+++ b/sergio_instagram_messaging/generate_dm_report.py
@ -1,10 +1,8 @@
 from __future__ import annotations

 import argparse
-import csv
 import json
 import os
-import statistics
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
@ -21,11 +19,6 @@ def _load_json(path: Path) -> dict[str, Any]:
    return json.loads(path.read_text(encoding="utf-8", errors="replace"))


-def _read_csv(path: Path) -> list[dict[str, str]]:
-    with path.open("r", encoding="utf-8", newline="") as f:
-        return list(csv.DictReader(f))
-
-
 def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
    n = 0
    with path.open("r", encoding="utf-8", errors="replace") as f:
@ -36,284 +29,115 @@ def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
    return n


-def _pct(x: float) -> str:
-    return f"{x*100:.1f}%"
+def _pct(num: int, den: int) -> str:
+    return "n/a" if den <= 0 else f"{(num/den)*100:.1f}%"


@dataclass(frozen=True)
 class ReportInputs:
    summary: Path
    templates: Path
-    bot_audit: Path
-    objections: Path
    rescue: Path
-    eras: Path
    training_pairs: Path
-    fatigue_png: Path
-    editorial_png: Path


 def _resolve_inputs(analysis_dir: Path) -> ReportInputs:
    return ReportInputs(
        summary=analysis_dir / "summary.json",
        templates=analysis_dir / "top_outgoing_templates.json",
-        bot_audit=analysis_dir / "bot_performance_audit.csv",
-        objections=analysis_dir / "objection_handlers.json",
        rescue=analysis_dir / "rescue_playbook.json",
-        eras=analysis_dir / "sergio_eras.csv",
        training_pairs=analysis_dir / "training_pairs.jsonl",
-        fatigue_png=analysis_dir / "bot_fatigue_chart.png",
-        editorial_png=analysis_dir / "editorial_timeline.png",
    )


 def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:
    inp = _resolve_inputs(analysis_dir)
-    for p in inp.__dict__.values():
-        if not Path(p).exists():
-            raise FileNotFoundError(str(p))
+    if not inp.summary.exists():
+        raise FileNotFoundError(str(inp.summary))

    summary = _load_json(inp.summary)
-    templates = _load_json(inp.templates)
-    objections = _load_json(inp.objections)
-    rescues = _load_json(inp.rescue)
-    bot_audit = _read_csv(inp.bot_audit)

-    owner = summary.get("owner_name") or "Unknown"
    conv = summary.get("conversations") or {}
    conv_total = int(conv.get("total") or 0)
-    bot_only = int(conv.get("bot_only") or 0)
-    human = int(conv.get("human_intervened") or 0)
-    conversions = summary.get("conversions") or {}
-    conv_intent = int(conversions.get("intent") or 0)
-    conv_confirmed = int(conversions.get("confirmed") or 0)
+    template_only = int(conv.get("bot_only") or 0)
+    custom_replies = int(conv.get("human_intervened") or 0)

-    bot_only_rate = (bot_only / conv_total) if conv_total else 0.0
-    human_rate = (human / conv_total) if conv_total else 0.0
-    intent_rate = (conv_intent / conv_total) if conv_total else 0.0
-    confirmed_rate = (conv_confirmed / conv_total) if conv_total else 0.0
+    buying = summary.get("conversions") or {}
+    buying_weak = int(buying.get("intent") or 0)
+    buying_strong = int(buying.get("confirmed") or 0)

-    manual_style = summary.get("manual_style") or {}
-    median_len = manual_style.get("median_len_chars")
-    p90_len = manual_style.get("p90_len_chars")
-    question_rate = float(manual_style.get("question_rate") or 0.0)
-    exclaim_rate = float(manual_style.get("exclaim_rate") or 0.0)
-    emoji_rate = float(manual_style.get("emoji_rate") or 0.0)
-    lang_guess = manual_style.get("lang_guess") or {}
+    templates_total = None
+    templates_repeat = None
+    if inp.templates.exists():
+        t = _load_json(inp.templates)
+        templates_total = int(t.get("templates_total") or 0)
+        templates_repeat = int(t.get("bot_templates") or 0)

-    # Templates: prefer canonical strings (safe-ish) and avoid raw samples.
-    top_templates = templates.get("top_templates") or []
-    top_bot = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "bot"]
-    top_manual = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "manual"]
-
-    # Bot audit: best/worst by reply_rate.
-    def fnum(v: str | None) -> float:
+    rescue_count = None
+    if inp.rescue.exists():
        try:
-            return float(v or 0)
+            rescue = _load_json(inp.rescue)
+            rescue_count = len(rescue) if isinstance(rescue, list) else 0
        except Exception:
-            return 0.0
+            rescue_count = None

-    bot_audit_sorted = sorted(bot_audit, key=lambda r: fnum(r.get("sent")), reverse=True)
-    top_audit = bot_audit_sorted[:10]
-    best_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")), reverse=True)[:10]
-    worst_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")))[:10]
+    pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000) if inp.training_pairs.exists() else None

-    # Objections: most common replies per category.
-    objection_blocks: list[str] = []
-    if isinstance(objections, dict):
-        for cat in ("price", "time", "trust", "stop"):
-            replies = objections.get(cat) or []
-            if not isinstance(replies, list) or not replies:
-                continue
-            top3 = []
-            for r in replies[:3]:
-                if not isinstance(r, dict):
-                    continue
-                top3.append(f"- ({r.get('count')}) {r.get('reply')}")
-            if top3:
-                objection_blocks.append(f"### {cat}\n" + "\n".join(top3))
+    generated_at = summary.get("generated_at") if isinstance(summary.get("generated_at"), str) else None

-    rescue_count = len(rescues) if isinstance(rescues, list) else 0
-    pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000)
-
-    # Era summary: simple high-level notes.
-    eras_rows = _read_csv(inp.eras)
-    era_recent = eras_rows[-6:] if len(eras_rows) > 6 else eras_rows
-    era_offer_terms: list[str] = []
-    for row in era_recent:
-        offers = (row.get("top_offers") or "").strip()
-        if offers:
-            era_offer_terms.append(offers)
-
-    # A few derived notes.
-    lang_line = ", ".join(f"{k}={v}" for k, v in lang_guess.items())
-
-    # Summarize bot fatigue trend from image existence only (analysis already made it).
-    report = []
-    report.append("# Socialmediatorr Instagram DM History — Human Readable Report (English)")
+    report: list[str] = []
+    report.append("# Instagram DM History — Short Report (English)")
    report.append("")
-    report.append(f"- Generated: `{summary.get('generated_at')}`")
-    report.append(f"- Owner name used: `{owner}`")
+    if generated_at:
+        report.append(f"- Generated: `{generated_at}`")
+    report.append("- Inbox: `@socialmediatorr`")
    report.append("")

-    report.append("## 1) What This Dataset Represents")
+    report.append("## What This Is")
    report.append("")
-    report.append(
-        "This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergio’s style."
-    )
-    report.append(
-        "The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio)."
-    )
+    report.append("This is a short, plain-English summary of the DM history scan.")
+    report.append("It avoids quoting private messages and it avoids storing personal identities.")
    report.append("")

-    report.append("## 2) High-Level Metrics (All-Time)")
+    report.append("## Key Numbers")
    report.append("")
    report.append(f"- Conversations analyzed: **{conv_total:,}**")
-    report.append(f"- Bot-only conversations: **{bot_only:,}** ({_pct(bot_only_rate)})")
-    report.append(f"- Human-intervened conversations: **{human:,}** ({_pct(human_rate)})")
-    report.append(f"- Conversion (intent signals): **{conv_intent:,}** ({_pct(intent_rate)})")
-    report.append(f"- Conversion (confirmed signals): **{conv_confirmed:,}** ({_pct(confirmed_rate)})")
+    report.append(f"- Conversations that stayed template-only: **{template_only:,}** ({_pct(template_only, conv_total)})")
+    report.append(f"- Conversations that included custom replies: **{custom_replies:,}** ({_pct(custom_replies, conv_total)})")
+    report.append(f"- Buying/booking signals (weak): **{buying_weak:,}** ({_pct(buying_weak, conv_total)})")
+    report.append(f"- Buying/booking signals (strong): **{buying_strong:,}** ({_pct(buying_strong, conv_total)})")
    report.append("")
-    report.append(
-        "Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger."
-    )
+    report.append("Buying/booking signals are detected from text patterns (they are not a payment ledger).")
    report.append("")

-    report.append("## 3) Sergio Persona (From Manual/Hybrid Replies)")
+    report.append("## What You Need to Know")
    report.append("")
-    report.append(f"- Typical reply length: median **{median_len}** chars (p90 **{p90_len}**)")
-    report.append(f"- Questions: **{_pct(question_rate)}** | Exclamations: **{_pct(exclaim_rate)}** | Emoji: **{_pct(emoji_rate)}**")
-    report.append(f"- Language guess (manual replies): {lang_line or 'n/a'}")
+    report.append("The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.")
+    report.append("For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:")
    report.append("")
-    report.append("Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.")
+    report.append("- `reports/socialmediatorr/dm_history_report_en_detailed.md`")
    report.append("")

-    report.append("## 4) Bot vs Human Segmentation (What It Means)")
+    report.append("## Useful Inventory (Safe Counts Only)")
    report.append("")
-    report.append(
-        "- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).\n"
-        "- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).\n"
-        "- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity)."
-    )
-    report.append("")
-    report.append(
-        "This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune."
-    )
+    if templates_total is not None and templates_repeat is not None:
+        report.append(f"- Total outgoing templates detected: **{templates_total:,}**")
+        report.append(f"- High-frequency repeat templates: **{templates_repeat:,}**")
+    if rescue_count is not None:
+        report.append(f"- “Rescue” events detected: **{rescue_count:,}**")
+    if pairs_count is not None:
+        report.append(f"- Training pairs (user → reply) available: **{pairs_count:,}**")
+    if templates_total is None and rescue_count is None and pairs_count is None:
+        report.append("- (No additional artifacts were found next to `summary.json`.)")
    report.append("")

-    report.append("## 5) Top Detected Script Templates (Canonicalized)")
+    report.append("## What You Do Not Need to Know")
    report.append("")
-    if top_bot:
-        for i, t in enumerate(top_bot[:10], 1):
-            canon = (t.get("canonical") or "").strip()
-            count = int(t.get("count") or 0)
-            report.append(f"- BOT #{i}: sent **{count}**× — `{canon[:160]}`")
-    else:
-        report.append("- (No high-frequency bot templates detected with current thresholds.)")
-    report.append("")
-
-    report.append("## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)")
-    report.append("")
-    if top_manual:
-        for i, t in enumerate(top_manual[:10], 1):
-            canon = (t.get("canonical") or "").strip()
-            count = int(t.get("count") or 0)
-            report.append(f"- MANUAL-ish #{i}: seen **{count}**× — `{canon[:160]}`")
-    else:
-        report.append("- (No low-frequency manual templates included in the cached top list.)")
-    report.append("")
-
-    report.append("## 7) Bot Template Performance (Reply/Conversion Heuristics)")
-    report.append("")
-    report.append("These come from `bot_performance_audit.csv` and are computed per canonical bot template.")
-    report.append("")
-    if top_audit:
-        report.append("### Most-used bot templates (by volume)")
-        for r in top_audit[:8]:
-            report.append(
-                f"- sent={r.get('sent')} reply_rate={r.get('reply_rate')} intent_rate={r.get('conversion_intent_rate')} confirmed_rate={r.get('conversion_confirmed_rate')} — `{(r.get('canonical_template') or '')[:140]}`"
-            )
-        report.append("")
-    if best_reply:
-        report.append("### Best reply-rate bot templates")
-        for r in best_reply[:8]:
-            report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`")
-        report.append("")
-    if worst_reply:
-        report.append("### Worst reply-rate bot templates")
-        for r in worst_reply[:8]:
-            report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`")
-        report.append("")
-
-    report.append("## 8) Objections → Best Sergio Replies (Playbook)")
-    report.append("")
-    if objection_blocks:
-        report.extend(objection_blocks)
-    else:
-        report.append("- No objection handlers detected with current keyword rules.")
-    report.append("")
-
-    report.append("## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)")
-    report.append("")
-    report.append(f"- Rescue events detected (heuristic): **{rescue_count:,}**")
-    report.append(
-        "A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal."
-    )
-    report.append("")
-
-    report.append("## 10) Product / Offer Evolution (Eras)")
-    report.append("")
-    report.append(
-        "This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly."
-    )
-    report.append("")
-    if era_offer_terms:
-        report.append("Recent quarters (top extracted offer signals):")
-        for line in era_offer_terms:
-            report.append(f"- {line}")
-    else:
-        report.append("- No offer signals detected in the most recent quarters with current extraction rules.")
-    report.append("")
-
-    report.append("## 11) Charts")
-    report.append("")
-    report.append(f"- Bot fatigue (weekly reply rate to the dominant bot script): `{inp.fatigue_png}`")
-    report.append(f"- Editorial timeline (top bot scripts vs conversions): `{inp.editorial_png}`")
-    report.append("")
-
-    report.append("## 12) What To Build From This (Agent Requirements)")
-    report.append("")
-    report.append("### Core behavior")
-    report.append("- Start with top bot templates for predictable openers and FAQ-style flows.")
-    report.append("- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.")
-    report.append("- Use a rescue cadence (time-based triggers) after silence.")
-    report.append("")
-    report.append("### Data products to drive the agent")
-    report.append(f"- Training pairs (manual-only, converted threads): `{inp.training_pairs}` (rows: ~{pairs_count:,})")
-    report.append(f"- Objection handlers: `{inp.objections}`")
-    report.append(f"- Rescue playbook: `{inp.rescue}`")
-    report.append(f"- Script templates + editorial drift: `{inp.templates}`")
-    report.append("")
-    report.append("### Safety boundaries (recommended)")
-    report.append("- Never request or store passwords/2FA codes.")
-    report.append("- Avoid medical/legal/financial advice; redirect to a call or a human.")
-    report.append("- If user asks to move off-platform, follow Sergio’s historical policy and business rules.")
-    report.append("")
-
-    report.append("## 13) What We Do NOT Need To Know (Ignore / Do Not Store)")
-    report.append("")
-    report.append("- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.")
-    report.append("- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.")
-    report.append("- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.")
-    report.append("- Individual one-off edge cases that never repeat (unless they represent a safety boundary).")
-    report.append("- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.")
-    report.append("")
-
-    report.append("## 14) Caveats / Gaps")
-    report.append("")
-    report.append("- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.")
-    report.append("- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.")
-    report.append("- Language detection is heuristic; improve it if you need precise bilingual routing.")
+    report.append("Do not store or copy these into an automation system unless you have a clear operational reason:")
+    report.append("- Names, handles, phone numbers, emails.")
+    report.append("- Full conversation transcripts for every thread.")
+    report.append("- Photos, videos, audio, and other attachments.")
+    report.append("- One-off edge cases that never repeat.")
    report.append("")

    out_path.parent.mkdir(parents=True, exist_ok=True)
@ -323,19 +147,20 @@ def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:


 def main(argv: list[str] | None = None) -> int:
-    ap = argparse.ArgumentParser(description="Generate a human-readable English report from analyze_instagram_export outputs.")
-    ap.add_argument("--analysis-dir", required=True, help="directory produced by analyze_instagram_export (contains summary.json)")
-    ap.add_argument("--out", default=None, help="output markdown path (default: <analysis-dir>/dm_history_report_en.md)")
+    ap = argparse.ArgumentParser(description="Generate a short, safe DM history report from an analysis directory.")
+    ap.add_argument("--analysis-dir", required=True, help="analyze_instagram_export output directory")
+    ap.add_argument("--out", default=None, help="output markdown path (default: dm_history_report_en.md in CWD)")
    args = ap.parse_args(argv)

    analysis_dir = Path(args.analysis_dir)
-    out_path = Path(args.out) if args.out else (analysis_dir / "dm_history_report_en.md")
+    out_path = Path(args.out) if args.out else (Path.cwd() / "dm_history_report_en.md")
+
    try:
        p = generate_report(analysis_dir=analysis_dir, out_path=out_path)
        print(json.dumps({"ok": True, "out": str(p)}, ensure_ascii=False))
        return 0
    except FileNotFoundError as e:
-        print(f"Missing required input: {e}", file=os.sys.stderr)
+        print(f"Missing analysis input: {e}", file=os.sys.stderr)
        return 2
    except Exception as e:
        print(f"Report generation failed: {e}", file=os.sys.stderr)
--- a/sergio_instagram_messaging/generate_dm_report_detailed.py
+++ b/sergio_instagram_messaging/generate_dm_report_detailed.py
@ -13,7 +13,7 @@ from typing import Any, Iterable, Literal

 from .analyze_instagram_export import canonicalize_text

-DEFAULT_LOCAL_TZ_NAME = "Europe/Brussels"
+DEFAULT_LOCAL_TZ_NAME = "Europe/Paris"


 def _safe_chmod_600(path: Path) -> None:
@ -418,11 +418,11 @@ def _question_theme(text: str) -> str | None:
    toks = s_compact.split()
    if len(toks) == 1:
        w = toks[0]
-        if w in {"book", "ebook", "libro", "pdf"}:
+        if w in {"book", "ebook", "libro", "pdf", "livre", "llibre"}:
            return "Just one word: book"
-        if w in {"link", "enlace"}:
+        if w in {"link", "enlace", "lien", "enllac", "enllaç"}:
            return "Just one word: link"
-        if w in {"price", "precio", "cost"}:
+        if w in {"price", "precio", "cost", "prix", "preu"}:
            return "Just one word: price"

    # "I tried, but it didn't arrive / it doesn't work"
@ -499,15 +499,48 @@ def _question_theme(text: str) -> str | None:
        return "Where are you based?"

    # Price / cost
-    if any(k in s for k in ("price", "cost", "how much", "$", "€", "usd", "eur", "precio", "cuanto", "cuánto", "caro")):
+    if any(
+        k in s
+        for k in (
+            "price",
+            "cost",
+            "how much",
+            "$",
+            "€",
+            "usd",
+            "eur",
+            "precio",
+            "cuanto",
+            "cuánto",
+            "caro",
+            "prix",
+            "preu",
+        )
+    ):
        return "What does it cost?"

    # Link / payment link
-    if any(k in s for k in ("link", "send the link", "send me the link", "where is the link", "enlace", "stripe", "paypal", "checkout", "invoice")):
+    if any(
+        k in s
+        for k in (
+            "link",
+            "send the link",
+            "send me the link",
+            "where is the link",
+            "enlace",
+            "lien",
+            "enllaç",
+            "enllac",
+            "stripe",
+            "paypal",
+            "checkout",
+            "invoice",
+        )
+    ):
        return "Can you send the link?"

    # Book / ebook / pdf
-    if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro")):
+    if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro", "livre", "llibre")):
        return "Where do I get the book?"

    # Call / schedule
@ -533,7 +566,7 @@ def _question_theme(text: str) -> str | None:
        return "How do I book a call?"

    # Video
-    if any(k in s for k in ("video", "vídeo", "youtube")):
+    if any(k in s for k in ("video", "vídeo", "vidéo", "youtube")):
        return "Can you send the video?"

    # Steps / what next
@ -541,11 +574,45 @@ def _question_theme(text: str) -> str | None:
        return "What are the steps?"

    # How it works / details
-    if any(k in s for k in ("how does", "how it works", "how does it work", "how does this work", "como funciona", "cómo funciona", "more info", "details", "explain")):
+    if any(
+        k in s
+        for k in (
+            "how does",
+            "how it works",
+            "how does it work",
+            "how does this work",
+            "como funciona",
+            "cómo funciona",
+            "more info",
+            "details",
+            "explain",
+            "comment ça marche",
+            "ça marche",
+            "com funciona",
+        )
+    ):
        return "How does it work?"

    # What you do / what is this
-    if any(k in s for k in ("what is this", "what do you do", "what is it", "what do you offer", "service", "services", "que es", "qué es", "que haces", "qué haces", "de que va", "de qué va")):
+    if any(
+        k in s
+        for k in (
+            "what is this",
+            "what do you do",
+            "what is it",
+            "what do you offer",
+            "service",
+            "services",
+            "que es",
+            "qué es",
+            "que haces",
+            "qué haces",
+            "de que va",
+            "de qué va",
+            "c'est quoi",
+            "cest quoi",
+        )
+    ):
        return "What is this?"

    # Trust / legitimacy
@ -597,6 +664,8 @@ def _offer_terms(text: str) -> set[str]:
        ("ebook", "Ebook"),
        ("e-book", "Ebook"),
        ("libro", "Book"),
+        ("livre", "Book"),
+        ("llibre", "Book"),
        ("pdf", "PDF"),
        ("call", "Call"),
        ("llamada", "Call"),
@ -935,20 +1004,17 @@ def generate_report(
    now = datetime.now(timezone.utc).date().isoformat()
    report: list[str] = []

-    report.append("# Socialmediatorr Instagram DM History : Plain-English Deep Report")
+    report.append("# Instagram DM History — Plain-English Deep Report")
    report.append("")
-    report.append("## DM History Deep Report")
+    report.append("## What This Is")
    report.append("")
-    report.append(f"**Subject:** Instagram direct messages for `@socialmediatorr`")
-    report.append("**Version:** v1.0 (STYLE BIBLE EN 3.0GM)")
+    report.append(f"**Inbox:** `@socialmediatorr`")
    report.append(f"**Date:** {now}")
-    report.append("**Status:** REVIEW REQUIRED")
-    report.append("**Citation:** `if://report/socialmediatorr/instagram/dm-history/`")
-    report.append("**Author:** Danny Stocker | InfraFabric Research")
+    report.append("**Time zone used:** CET")
    report.append("")
    report.append("### How This Report Was Made")
    report.append("")
-    report.append("> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.")
+    report.append("> This is a count of patterns. It is not a therapy note and it is not a sales ledger.")
    report.append("")
    report.append(
        "This document was generated by reading an Instagram data export and counting repeat patterns over time. "
@ -957,9 +1023,9 @@ def generate_report(
    report.append("")
    report.append("---")
    report.append("")
-    report.append(f"**Context:** This inbox contains a high-volume message-and-reply system over {window_days} days.")
+    report.append(f"**Context:** This inbox contains message history over {window_days} days.")
    report.append("")
-    report.append("> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.")
+    report.append("> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.")
    report.append("")
    report.append(
        "The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust."
--- a/tools/README.md
+++ b/tools/README.md
@ -0,0 +1,19 @@
+# Tools
+
+## Mermaid checks (Markdown diagrams)
+
+This repo uses Mermaid diagrams in Markdown reports.
+
+Local lint (partial):
+
+- `npm install`
+- `npm run verify:mermaid`
+
+Notes:
+- This check validates diagram types supported by `@mermaid-js/parser` (for example: `pie`).
+- Some diagram types (for example: `flowchart`) are not supported by that parser yet and will be reported as `skipped`.
+
+Full validation (recommended):
+
+- Use Forgejo’s built-in PDF export for the report file. If the PDF export succeeds, the diagrams compiled successfully.
+
--- a/tools/verify_mermaid.mjs
+++ b/tools/verify_mermaid.mjs
@ -0,0 +1,109 @@
+import fs from "node:fs";
+import path from "node:path";
+import process from "node:process";
+
+import { parse } from "@mermaid-js/parser";
+
+function isMarkdownFile(filePath) {
+  const lower = filePath.toLowerCase();
+  return lower.endsWith(".md") || lower.endsWith(".markdown");
+}
+
+function* walkFiles(rootPath) {
+  const stat = fs.statSync(rootPath);
+  if (stat.isFile()) {
+    yield rootPath;
+    return;
+  }
+
+  const entries = fs.readdirSync(rootPath, { withFileTypes: true });
+  for (const ent of entries) {
+    const full = path.join(rootPath, ent.name);
+    if (ent.isDirectory()) {
+      yield* walkFiles(full);
+    } else if (ent.isFile()) {
+      yield full;
+    }
+  }
+}
+
+function extractMermaidBlocks(markdownText) {
+  const blocks = [];
+  const re = /```mermaid\s*([\s\S]*?)```/g;
+  let m;
+  while ((m = re.exec(markdownText)) !== null) {
+    blocks.push(m[1] || "");
+  }
+  return blocks;
+}
+
+function detectDiagramType(code) {
+  const lines = String(code || "")
+    .replace(/\r\n?/g, "\n")
+    .split("\n")
+    .map((l) => l.trim())
+    .filter((l) => l && !l.startsWith("%%"));
+
+  if (!lines.length) return null;
+  const head = lines[0];
+
+  if (head.startsWith("pie")) return "pie";
+  if (head.startsWith("gitGraph")) return "gitGraph";
+  if (head.startsWith("architecture")) return "architecture";
+  if (head.startsWith("packet")) return "packet";
+  if (head.startsWith("info")) return "info";
+  if (head.startsWith("radar")) return "radar";
+  if (head.startsWith("treemap")) return "treemap";
+
+  // Not supported by @mermaid-js/parser yet (example: flowchart/sequence/class).
+  return null;
+}
+
+async function main() {
+  const args = process.argv.slice(2);
+  const roots = args.length ? args : ["reports"];
+
+  let ok = true;
+  let total = 0;
+  let failures = 0;
+  let skipped = 0;
+
+  for (const root of roots) {
+    for (const filePath of walkFiles(root)) {
+      if (!isMarkdownFile(filePath)) continue;
+
+      const text = fs.readFileSync(filePath, "utf8");
+      const blocks = extractMermaidBlocks(text);
+      if (!blocks.length) continue;
+
+      for (let i = 0; i < blocks.length; i++) {
+        const code = String(blocks[i] || "").trim();
+        total += 1;
+        const diagramType = detectDiagramType(code);
+        if (!diagramType) {
+          skipped += 1;
+          continue;
+        }
+        try {
+          await parse(diagramType, code);
+        } catch (err) {
+          ok = false;
+          failures += 1;
+          const msg =
+            err && typeof err === "object" && "message" in err ? String(err.message) : String(err);
+          console.error(`[mermaid] ${filePath} block=${i + 1} type=${diagramType}: ${msg}`);
+        }
+      }
+    }
+  }
+
+  if (ok) {
+    console.log(JSON.stringify({ ok: true, diagrams: total, skipped }, null, 2));
+    return 0;
+  }
+
+  console.error(JSON.stringify({ ok: false, diagrams: total, failures, skipped }, null, 2));
+  return 1;
+}
+
+process.exitCode = await main();