Sanitize DM reports and add Mermaid tooling

This commit is contained in:
danny 2025-12-24 13:13:48 +00:00
parent a6222083e6
commit a140b3787a
10 changed files with 516 additions and 427 deletions

5
.gitignore vendored
View file

@ -7,4 +7,7 @@ venv/
.vscode/
/dist/
/build/
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*

View file

@ -103,6 +103,12 @@ This produces the “Sergio persona” artifacts needed for the DM agent:
Outputs are written with mode `600` and may contain sensitive DM content. Keep them out of git.
This repo includes **sanitized** example reports (no verbatim client DMs) under:
- `reports/socialmediatorr/`
Raw analysis artifacts (e.g., training pairs, rescued threads, template caches) should remain in a private working directory such as `/root/tmp/` and should not be committed.
### Analyze a raw Instagram export folder (recommended)
Optional: index first (lets you filter recency without scanning every thread):

174
package-lock.json generated Normal file
View file

@ -0,0 +1,174 @@
{
"name": "emo-social-insta-dm-agent-tools",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "emo-social-insta-dm-agent-tools",
"devDependencies": {
"@mermaid-js/parser": "^0.6.3"
}
},
"node_modules/@chevrotain/cst-dts-gen": {
"version": "11.0.3",
"resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-11.0.3.tgz",
"integrity": "sha512-BvIKpRLeS/8UbfxXxgC33xOumsacaeCKAjAeLyOn7Pcp95HiRbrpl14S+9vaZLolnbssPIUuiUd8IvgkRyt6NQ==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"@chevrotain/gast": "11.0.3",
"@chevrotain/types": "11.0.3",
"lodash-es": "4.17.21"
}
},
"node_modules/@chevrotain/gast": {
"version": "11.0.3",
"resolved": "https://registry.npmjs.org/@chevrotain/gast/-/gast-11.0.3.tgz",
"integrity": "sha512-+qNfcoNk70PyS/uxmj3li5NiECO+2YKZZQMbmjTqRI3Qchu8Hig/Q9vgkHpI3alNjr7M+a2St5pw5w5F6NL5/Q==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"@chevrotain/types": "11.0.3",
"lodash-es": "4.17.21"
}
},
"node_modules/@chevrotain/regexp-to-ast": {
"version": "11.0.3",
"resolved": "https://registry.npmjs.org/@chevrotain/regexp-to-ast/-/regexp-to-ast-11.0.3.tgz",
"integrity": "sha512-1fMHaBZxLFvWI067AVbGJav1eRY7N8DDvYCTwGBiE/ytKBgP8azTdgyrKyWZ9Mfh09eHWb5PgTSO8wi7U824RA==",
"dev": true,
"license": "Apache-2.0"
},
"node_modules/@chevrotain/types": {
"version": "11.0.3",
"resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-11.0.3.tgz",
"integrity": "sha512-gsiM3G8b58kZC2HaWR50gu6Y1440cHiJ+i3JUvcp/35JchYejb2+5MVeJK0iKThYpAa/P2PYFV4hoi44HD+aHQ==",
"dev": true,
"license": "Apache-2.0"
},
"node_modules/@chevrotain/utils": {
"version": "11.0.3",
"resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-11.0.3.tgz",
"integrity": "sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==",
"dev": true,
"license": "Apache-2.0"
},
"node_modules/@mermaid-js/parser": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/@mermaid-js/parser/-/parser-0.6.3.tgz",
"integrity": "sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==",
"dev": true,
"license": "MIT",
"dependencies": {
"langium": "3.3.1"
}
},
"node_modules/chevrotain": {
"version": "11.0.3",
"resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.0.3.tgz",
"integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"@chevrotain/cst-dts-gen": "11.0.3",
"@chevrotain/gast": "11.0.3",
"@chevrotain/regexp-to-ast": "11.0.3",
"@chevrotain/types": "11.0.3",
"@chevrotain/utils": "11.0.3",
"lodash-es": "4.17.21"
}
},
"node_modules/chevrotain-allstar": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/chevrotain-allstar/-/chevrotain-allstar-0.3.1.tgz",
"integrity": "sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==",
"dev": true,
"license": "MIT",
"dependencies": {
"lodash-es": "^4.17.21"
},
"peerDependencies": {
"chevrotain": "^11.0.0"
}
},
"node_modules/langium": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/langium/-/langium-3.3.1.tgz",
"integrity": "sha512-QJv/h939gDpvT+9SiLVlY7tZC3xB2qK57v0J04Sh9wpMb6MP1q8gB21L3WIo8T5P1MSMg3Ep14L7KkDCFG3y4w==",
"dev": true,
"license": "MIT",
"dependencies": {
"chevrotain": "~11.0.3",
"chevrotain-allstar": "~0.3.0",
"vscode-languageserver": "~9.0.1",
"vscode-languageserver-textdocument": "~1.0.11",
"vscode-uri": "~3.0.8"
},
"engines": {
"node": ">=16.0.0"
}
},
"node_modules/lodash-es": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.21.tgz",
"integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==",
"dev": true,
"license": "MIT"
},
"node_modules/vscode-jsonrpc": {
"version": "8.2.0",
"resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz",
"integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/vscode-languageserver": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/vscode-languageserver/-/vscode-languageserver-9.0.1.tgz",
"integrity": "sha512-woByF3PDpkHFUreUa7Hos7+pUWdeWMXRd26+ZX2A8cFx6v/JPTtd4/uN0/jB6XQHYaOlHbio03NTHCqrgG5n7g==",
"dev": true,
"license": "MIT",
"dependencies": {
"vscode-languageserver-protocol": "3.17.5"
},
"bin": {
"installServerIntoExtension": "bin/installServerIntoExtension"
}
},
"node_modules/vscode-languageserver-protocol": {
"version": "3.17.5",
"resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz",
"integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==",
"dev": true,
"license": "MIT",
"dependencies": {
"vscode-jsonrpc": "8.2.0",
"vscode-languageserver-types": "3.17.5"
}
},
"node_modules/vscode-languageserver-textdocument": {
"version": "1.0.12",
"resolved": "https://registry.npmjs.org/vscode-languageserver-textdocument/-/vscode-languageserver-textdocument-1.0.12.tgz",
"integrity": "sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA==",
"dev": true,
"license": "MIT"
},
"node_modules/vscode-languageserver-types": {
"version": "3.17.5",
"resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz",
"integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==",
"dev": true,
"license": "MIT"
},
"node_modules/vscode-uri": {
"version": "3.0.8",
"resolved": "https://registry.npmjs.org/vscode-uri/-/vscode-uri-3.0.8.tgz",
"integrity": "sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==",
"dev": true,
"license": "MIT"
}
}
}

11
package.json Normal file
View file

@ -0,0 +1,11 @@
{
"name": "emo-social-insta-dm-agent-tools",
"private": true,
"type": "module",
"devDependencies": {
"@mermaid-js/parser": "^0.6.3"
},
"scripts": {
"verify:mermaid": "node tools/verify_mermaid.mjs"
}
}

View file

@ -1,163 +1,42 @@
# Socialmediatorr Instagram DM History — Human Readable Report (English)
# Instagram DM History — Short Report (English)
- Generated: `2025-12-24T02:28:34+00:00`
- Owner name used: `Sergio de Vocht`
- Inbox: `@socialmediatorr`
## 1) What This Dataset Represents
## What This Is
This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergios style.
The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio).
This is a short, plain-English summary of the DM history scan.
It avoids quoting private messages and it avoids storing personal identities.
## 2) High-Level Metrics (All-Time)
## Key Numbers
- Conversations analyzed: **10,061**
- Bot-only conversations: **1,883** (18.7%)
- Human-intervened conversations: **8,153** (81.0%)
- Conversion (intent signals): **1,923** (19.1%)
- Conversion (confirmed signals): **55** (0.5%)
- Conversations that stayed template-only: **1,883** (18.7%)
- Conversations that included custom replies: **8,153** (81.0%)
- Buying/booking signals (weak): **1,923** (19.1%)
- Buying/booking signals (strong): **55** (0.5%)
Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger.
Buying/booking signals are detected from text patterns (they are not a payment ledger).
## 3) Sergio Persona (From Manual/Hybrid Replies)
## What You Need to Know
- Typical reply length: median **60.0** chars (p90 **67.0**)
- Questions: **2.4%** | Exclamations: **1.7%** | Emoji: **0.0%**
- Language guess (manual replies): en=8043, es=423, unknown=224
The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.
For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:
Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.
- `reports/socialmediatorr/dm_history_report_en_detailed.md`
## 4) Bot vs Human Segmentation (What It Means)
## Useful Inventory (Safe Counts Only)
- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).
- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).
- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity).
- Total outgoing templates detected: **8,550**
- High-frequency repeat templates: **24**
- “Rescue” events detected: **7**
- Training pairs (user → reply) available: **524**
This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune.
## What You Do Not Need to Know
## 5) Top Detected Script Templates (Canonicalized)
- BOT #1: sent **2495**×`crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
- BOT #2: sent **2483**×`perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien contigo mism`
- BOT #3: sent **2483**×`te lo dejo por aquã dame un minuto`
- BOT #4: sent **2483**×`me gustarã a saber tu opiniã³n`
- BOT #5: sent **1878**×`me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando paz`
- BOT #6: sent **1878**×`no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influyendo en tus relacione`
- BOT #7: sent **706**×`gracias por ese feedback ð`
- BOT #8: sent **706**×`como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s conflicto ãºltimament`
- BOT #9: sent **680**×`you sent a private reply to a comment on your instagram post`
- BOT #10: sent **469**×`por cierto`
## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)
- MANUAL-ish #1: seen **10**×`quã bonito leer eso a veces entender las palabras abre puertas nuevas â sientes que en tu entorno hay algo que te gustarã a armonizar mã s`
- MANUAL-ish #2: seen **7**×`buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c`
- MANUAL-ish #3: seen **5**×`hola buenas como estas ð espero que estã s bien me gustarã a saber que es lo q te ha echo estar aquã y querer saber mã s sobre nuestras formaciã³n`
- MANUAL-ish #4: seen **5**×`y si pudieras resolver esto cã³mo crees que cambiarã a tu forma de relacionarte o sentirte`
- MANUAL-ish #5: seen **5**×`para conocerte un poquito mã s que te gustarã a conseguir con emosocial cual es tu mayor desafã o actualmente dentro de tus relaciones`
- MANUAL-ish #6: seen **4**×`okey te entiendo perfectamente ð segãºn lo que me comentas creo que esta lista de videos de youtube te va a venir genial para empezar a entender las bases del c`
- MANUAL-ish #7: seen **4**×`buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c`
- MANUAL-ish #8: seen **3**×`hola buenas como estas espero que bien cuã ntame que te parece el contenido que estamos ofreciendo por whatsapp te leoð ð`
## 7) Bot Template Performance (Reply/Conversion Heuristics)
These come from `bot_performance_audit.csv` and are computed per canonical bot template.
### Most-used bot templates (by volume)
- sent=2495 reply_rate=0.376 intent_rate=0.0766 confirmed_rate=0.012 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
- sent=2483 reply_rate=0.0334 intent_rate=0.0769 confirmed_rate=0.0121 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co`
- sent=2483 reply_rate=0.1188 intent_rate=0.0769 confirmed_rate=0.0121 — `te lo dejo por aquã dame un minuto`
- sent=2483 reply_rate=0.0028 intent_rate=0.0769 confirmed_rate=0.0121 — `me gustarã a saber tu opiniã³n`
- sent=1878 reply_rate=0.0 intent_rate=0.0 confirmed_rate=0.0005 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando pa`
- sent=1878 reply_rate=0.1768 intent_rate=0.0 confirmed_rate=0.0005 — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influye`
- sent=706 reply_rate=0.0042 intent_rate=0.1048 confirmed_rate=0.017 — `gracias por ese feedback ð`
- sent=706 reply_rate=0.8187 intent_rate=0.1048 confirmed_rate=0.017 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c`
### Best reply-rate bot templates
- reply_rate=0.8187 sent=706 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c`
- reply_rate=0.7143 sent=98 — `pudiste entrar correctamente`
- reply_rate=0.7022 sent=178 — `por favor toca una de las siguientes opciones ð`
- reply_rate=0.4701 sent=134 — `pudiste verlo`
- reply_rate=0.4602 sent=176 — `que te pareciã³ ese diccionario hay alguna palabra que sueles utilizar y no te habã as dado cuenta`
- reply_rate=0.376 sent=2495 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
- reply_rate=0.3458 sent=240 — `gracias por tu sinceridad ð`
- reply_rate=0.3291 sent=158 — `te dejo este video donde explico por quã las relaciones de pareja entran en conflicto aunque haya amor`
### Worst reply-rate bot templates
- reply_rate=0.0 sent=1878 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando pa`
- reply_rate=0.0 sent=337 — `enhorabuena por querer dar ese cambio estã s a un paso de transformar tu relaciã³n en solo <NUM> dã as te invito a un taller exclusivo donde`
- reply_rate=0.0 sent=158 — `gracias por compartirlo â ï`
- reply_rate=0.0 sent=131 — `entiendo perfectamente ð`
- reply_rate=0.0 sent=54 — `this account can t receive your message because they don t allow new message requests from everyone`
- reply_rate=0.0028 sent=2483 — `me gustarã a saber tu opiniã³n`
- reply_rate=0.0042 sent=706 — `gracias por ese feedback ð`
- reply_rate=0.0334 sent=2483 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co`
## 8) Objections → Best Sergio Replies (Playbook)
### price
- (1) Ey Alex que tal
- (1) Qué bonito leer eso. A veces entender las palabras abre puertas nuevas. ¿Sientes que en tu entorno hay algo que te gustaría armonizar más?
- (1) Y que es lo que te impide dar ese cambio? Te veo con mucha seguridad
### time
- (1) Brutal esto que dices
- (1) No es una herida ELA! Apego que no te dieron tus padres es solo una parte del espectro, necesitamos validación del mundo y de forma constante, no es una herida del pasado es algo que falta darnos en el presente.
- (1) Vaya, suena bastante frustrante el hecho de querer "bajar esa guardia", y sentir que cuando lo haces, todo cambia
### trust
- (2) Hola Dani, gracias por el mensaje bonito, de verdad. Me alegra mucho saber que el contenido te está ayudando a mirar las cosas desde otro punto de vista
- (2) Qué bonito leer eso. A veces entender las palabras abre puertas nuevas. ¿Sientes que en tu entorno hay algo que te gustaría armonizar más?
- (2) En la plataforma no sale por ningún lugar, y normalmente siempre llegan 2 emails, 1 de confirmación de pago y otro de bienvenida
## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)
- Rescue events detected (heuristic): **7**
A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal.
## 10) Product / Offer Evolution (Eras)
This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly.
Recent quarters (top extracted offer signals):
- stripe(1)
- book(1912); ebook(1912); call(8); calendly(7); coaching(2); stripe(2); pdf(2); paypal(1)
## 11) Charts
- Bot fatigue (weekly reply rate to the dominant bot script): `bot_fatigue_chart.png`
![](bot_fatigue_chart.png)
- Editorial timeline (top bot scripts vs conversions): `editorial_timeline.png`
![](editorial_timeline.png)
## 12) What To Build From This (Agent Requirements)
### Core behavior
- Start with top bot templates for predictable openers and FAQ-style flows.
- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.
- Use a rescue cadence (time-based triggers) after silence.
### Data products to drive the agent
- Training pairs (manual-only, converted threads): `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/training_pairs.jsonl` (rows: ~524)
- Objection handlers: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/objection_handlers.json`
- Rescue playbook: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/rescue_playbook.json`
- Script templates + editorial drift: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/top_outgoing_templates.json`
### Safety boundaries (recommended)
- Never request or store passwords/2FA codes.
- Avoid medical/legal/financial advice; redirect to a call or a human.
- If user asks to move off-platform, follow Sergios historical policy and business rules.
## 13) What We Do NOT Need To Know (Ignore / Do Not Store)
- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.
- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.
- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.
- Individual one-off edge cases that never repeat (unless they represent a safety boundary).
- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.
## 14) Caveats / Gaps
- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.
- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.
- Language detection is heuristic; improve it if you need precise bilingual routing.
Do not store or copy these into an automation system unless you have a clear operational reason:
- Names, handles, phone numbers, emails.
- Full conversation transcripts for every thread.
- Photos, videos, audio, and other attachments.
- One-off edge cases that never repeat.

View file

@ -1,25 +1,22 @@
# Socialmediatorr Instagram DM History : Plain-English Deep Report
# Instagram DM History — Plain-English Deep Report
## DM History Deep Report
## What This Is
**Subject:** Instagram direct messages for `@socialmediatorr`
**Version:** v1.0 (STYLE BIBLE EN 3.0GM)
**Inbox:** `@socialmediatorr`
**Date:** 2025-12-24
**Status:** REVIEW REQUIRED
**Citation:** `if://report/socialmediatorr/instagram/dm-history/`
**Author:** Danny Stocker | InfraFabric Research
**Time zone used:** CET
### How This Report Was Made
> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.
> This is a count of patterns. It is not a therapy note and it is not a sales ledger.
This document was generated by reading an Instagram data export and counting repeat patterns over time. It avoids quoting private client messages and it avoids storing personal identities.
---
**Context:** This inbox contains a high-volume message-and-reply system over 429 days.
**Context:** This inbox contains message history over 429 days.
> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.
> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.
The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust.
@ -35,7 +32,7 @@ Across the observed window, you sent a very large number of messages and you rec
| Total messages | 54,069 | Instagram export |
| Messages you sent | 43,607 | Instagram export |
| Messages people sent you | 10,462 | Instagram export |
| Messages that look like a question or a request | 2,713 | Instagram export |
| Messages that look like a question or a request | 2,715 | Instagram export |
| System messages about new followers (auto text in the inbox) | 8,081 | Instagram export |
### What You Need to Know (In Plain English)
@ -67,7 +64,7 @@ To avoid guesswork, we start with 3-month blocks (a simple way to smooth noise),
| 2025 Jan-Mar | 21 | 0 | 0 |
| 2025 Apr-Jun | 92 | 97 | 15 |
| 2025 Jul-Sep | 623 | 882 | 89 |
| 2025 Oct-Dec | 9,712 | 42,628 | 2,609 |
| 2025 Oct-Dec | 9,712 | 42,628 | 2,611 |
Same data as charts:
@ -112,8 +109,8 @@ This month-by-month table is the clearest view of how the inbox changed over tim
| 2025-08 | 193 | 230 | 28 | 50.0% |
| 2025-09 | 284 | 330 | 24 | 20.8% |
| 2025-10 | 787 | 1,190 | 64 | 17.2% |
| 2025-11 | 854 | 2,194 | 149 | 46.3% |
| 2025-12 | 8,071 | 39,244 | 2,396 | 89.6% |
| 2025-11 | 854 | 2,194 | 150 | 46.7% |
| 2025-12 | 8,071 | 39,244 | 2,397 | 89.7% |
The busiest month was **2025-12** with **47,315** messages total (87.5% of everything in this export). That single month dominates the shape of the data.
@ -126,7 +123,7 @@ Use this to time follow-ups and first messages. Do not spread effort evenly acro
| Day of week | Messages from people | Messages you sent | Questions/requests |
|---|---:|---:|---:|
| Monday | 1,600 | 8,359 | 131 |
| Tuesday | 1,939 | 9,654 | 192 |
| Tuesday | 1,939 | 9,654 | 194 |
| Wednesday | 1,282 | 5,554 | 159 |
| Thursday | 2,261 | 6,908 | 1,268 |
| Friday | 1,705 | 5,733 | 803 |
@ -184,7 +181,7 @@ One caution: “fast replies” are often repeat messages. This section shows ov
| Typical time to reply to questions/requests | 2 seconds | Instagram export |
| Slow end for questions/requests (90% are faster) | 4 seconds | Instagram export |
| Messages from people answered within 48 hours | 7,467 (71.4%) | Instagram export |
| Questions/requests answered within 48 hours | 2,278 (84.0%) | Instagram export |
| Questions/requests answered within 48 hours | 2,280 (84.0%) | Instagram export |
Breakdown by message type (repeat messages vs custom messages):
@ -242,11 +239,11 @@ This list is grouped by meaning (not by exact wording). It includes very short r
| Rank | Topic (plain English) | Count | Share of all questions/requests |
|---:|---|---:|---:|
| 1 | Just one word: book | 1,857 | 68.4% |
| 2 | What is this? | 206 | 7.6% |
| 3 | Can you send the video? | 191 | 7.0% |
| 4 | Other question | 120 | 4.4% |
| 2 | What is this? | 203 | 7.5% |
| 3 | Can you send the video? | 189 | 7.0% |
| 4 | Other question | 118 | 4.3% |
| 5 | Can you help me? | 74 | 2.7% |
| 6 | Can you send the link? | 61 | 2.2% |
| 6 | Can you send the link? | 70 | 2.6% |
| 7 | What does it cost? | 53 | 2.0% |
| 8 | Is this therapy? | 44 | 1.6% |
| 9 | Where do I get the book? | 36 | 1.3% |
@ -261,12 +258,12 @@ This list is grouped by meaning (not by exact wording). It includes very short r
| 18 | Can I get a refund? | 1 | 0.0% |
| 19 | How long does it take? | 1 | 0.0% |
In plain terms: **1,893** of **2,713** questions/requests are about the book (69.8%).
In plain terms: **1,893** of **2,715** questions/requests are about the book (69.7%).
```mermaid
pie title Questions/Requests: Book vs Everything Else
"Book" : 1893
"Everything else" : 820
"Everything else" : 822
```
### Content Patterns (What You Mention When You Sell)

View file

@ -1,10 +1,8 @@
from __future__ import annotations
import argparse
import csv
import json
import os
import statistics
from dataclasses import dataclass
from pathlib import Path
from typing import Any
@ -21,11 +19,6 @@ def _load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8", errors="replace"))
def _read_csv(path: Path) -> list[dict[str, str]]:
with path.open("r", encoding="utf-8", newline="") as f:
return list(csv.DictReader(f))
def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
n = 0
with path.open("r", encoding="utf-8", errors="replace") as f:
@ -36,284 +29,115 @@ def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
return n
def _pct(x: float) -> str:
return f"{x*100:.1f}%"
def _pct(num: int, den: int) -> str:
return "n/a" if den <= 0 else f"{(num/den)*100:.1f}%"
@dataclass(frozen=True)
class ReportInputs:
summary: Path
templates: Path
bot_audit: Path
objections: Path
rescue: Path
eras: Path
training_pairs: Path
fatigue_png: Path
editorial_png: Path
def _resolve_inputs(analysis_dir: Path) -> ReportInputs:
return ReportInputs(
summary=analysis_dir / "summary.json",
templates=analysis_dir / "top_outgoing_templates.json",
bot_audit=analysis_dir / "bot_performance_audit.csv",
objections=analysis_dir / "objection_handlers.json",
rescue=analysis_dir / "rescue_playbook.json",
eras=analysis_dir / "sergio_eras.csv",
training_pairs=analysis_dir / "training_pairs.jsonl",
fatigue_png=analysis_dir / "bot_fatigue_chart.png",
editorial_png=analysis_dir / "editorial_timeline.png",
)
def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:
inp = _resolve_inputs(analysis_dir)
for p in inp.__dict__.values():
if not Path(p).exists():
raise FileNotFoundError(str(p))
if not inp.summary.exists():
raise FileNotFoundError(str(inp.summary))
summary = _load_json(inp.summary)
templates = _load_json(inp.templates)
objections = _load_json(inp.objections)
rescues = _load_json(inp.rescue)
bot_audit = _read_csv(inp.bot_audit)
owner = summary.get("owner_name") or "Unknown"
conv = summary.get("conversations") or {}
conv_total = int(conv.get("total") or 0)
bot_only = int(conv.get("bot_only") or 0)
human = int(conv.get("human_intervened") or 0)
conversions = summary.get("conversions") or {}
conv_intent = int(conversions.get("intent") or 0)
conv_confirmed = int(conversions.get("confirmed") or 0)
template_only = int(conv.get("bot_only") or 0)
custom_replies = int(conv.get("human_intervened") or 0)
bot_only_rate = (bot_only / conv_total) if conv_total else 0.0
human_rate = (human / conv_total) if conv_total else 0.0
intent_rate = (conv_intent / conv_total) if conv_total else 0.0
confirmed_rate = (conv_confirmed / conv_total) if conv_total else 0.0
buying = summary.get("conversions") or {}
buying_weak = int(buying.get("intent") or 0)
buying_strong = int(buying.get("confirmed") or 0)
manual_style = summary.get("manual_style") or {}
median_len = manual_style.get("median_len_chars")
p90_len = manual_style.get("p90_len_chars")
question_rate = float(manual_style.get("question_rate") or 0.0)
exclaim_rate = float(manual_style.get("exclaim_rate") or 0.0)
emoji_rate = float(manual_style.get("emoji_rate") or 0.0)
lang_guess = manual_style.get("lang_guess") or {}
templates_total = None
templates_repeat = None
if inp.templates.exists():
t = _load_json(inp.templates)
templates_total = int(t.get("templates_total") or 0)
templates_repeat = int(t.get("bot_templates") or 0)
# Templates: prefer canonical strings (safe-ish) and avoid raw samples.
top_templates = templates.get("top_templates") or []
top_bot = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "bot"]
top_manual = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "manual"]
# Bot audit: best/worst by reply_rate.
def fnum(v: str | None) -> float:
rescue_count = None
if inp.rescue.exists():
try:
return float(v or 0)
rescue = _load_json(inp.rescue)
rescue_count = len(rescue) if isinstance(rescue, list) else 0
except Exception:
return 0.0
rescue_count = None
bot_audit_sorted = sorted(bot_audit, key=lambda r: fnum(r.get("sent")), reverse=True)
top_audit = bot_audit_sorted[:10]
best_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")), reverse=True)[:10]
worst_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")))[:10]
pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000) if inp.training_pairs.exists() else None
# Objections: most common replies per category.
objection_blocks: list[str] = []
if isinstance(objections, dict):
for cat in ("price", "time", "trust", "stop"):
replies = objections.get(cat) or []
if not isinstance(replies, list) or not replies:
continue
top3 = []
for r in replies[:3]:
if not isinstance(r, dict):
continue
top3.append(f"- ({r.get('count')}) {r.get('reply')}")
if top3:
objection_blocks.append(f"### {cat}\n" + "\n".join(top3))
generated_at = summary.get("generated_at") if isinstance(summary.get("generated_at"), str) else None
rescue_count = len(rescues) if isinstance(rescues, list) else 0
pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000)
# Era summary: simple high-level notes.
eras_rows = _read_csv(inp.eras)
era_recent = eras_rows[-6:] if len(eras_rows) > 6 else eras_rows
era_offer_terms: list[str] = []
for row in era_recent:
offers = (row.get("top_offers") or "").strip()
if offers:
era_offer_terms.append(offers)
# A few derived notes.
lang_line = ", ".join(f"{k}={v}" for k, v in lang_guess.items())
# Summarize bot fatigue trend from image existence only (analysis already made it).
report = []
report.append("# Socialmediatorr Instagram DM History — Human Readable Report (English)")
report: list[str] = []
report.append("# Instagram DM History — Short Report (English)")
report.append("")
report.append(f"- Generated: `{summary.get('generated_at')}`")
report.append(f"- Owner name used: `{owner}`")
if generated_at:
report.append(f"- Generated: `{generated_at}`")
report.append("- Inbox: `@socialmediatorr`")
report.append("")
report.append("## 1) What This Dataset Represents")
report.append("## What This Is")
report.append("")
report.append(
"This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergios style."
)
report.append(
"The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio)."
)
report.append("This is a short, plain-English summary of the DM history scan.")
report.append("It avoids quoting private messages and it avoids storing personal identities.")
report.append("")
report.append("## 2) High-Level Metrics (All-Time)")
report.append("## Key Numbers")
report.append("")
report.append(f"- Conversations analyzed: **{conv_total:,}**")
report.append(f"- Bot-only conversations: **{bot_only:,}** ({_pct(bot_only_rate)})")
report.append(f"- Human-intervened conversations: **{human:,}** ({_pct(human_rate)})")
report.append(f"- Conversion (intent signals): **{conv_intent:,}** ({_pct(intent_rate)})")
report.append(f"- Conversion (confirmed signals): **{conv_confirmed:,}** ({_pct(confirmed_rate)})")
report.append(f"- Conversations that stayed template-only: **{template_only:,}** ({_pct(template_only, conv_total)})")
report.append(f"- Conversations that included custom replies: **{custom_replies:,}** ({_pct(custom_replies, conv_total)})")
report.append(f"- Buying/booking signals (weak): **{buying_weak:,}** ({_pct(buying_weak, conv_total)})")
report.append(f"- Buying/booking signals (strong): **{buying_strong:,}** ({_pct(buying_strong, conv_total)})")
report.append("")
report.append(
"Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger."
)
report.append("Buying/booking signals are detected from text patterns (they are not a payment ledger).")
report.append("")
report.append("## 3) Sergio Persona (From Manual/Hybrid Replies)")
report.append("## What You Need to Know")
report.append("")
report.append(f"- Typical reply length: median **{median_len}** chars (p90 **{p90_len}**)")
report.append(f"- Questions: **{_pct(question_rate)}** | Exclamations: **{_pct(exclaim_rate)}** | Emoji: **{_pct(emoji_rate)}**")
report.append(f"- Language guess (manual replies): {lang_line or 'n/a'}")
report.append("The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.")
report.append("For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:")
report.append("")
report.append("Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.")
report.append("- `reports/socialmediatorr/dm_history_report_en_detailed.md`")
report.append("")
report.append("## 4) Bot vs Human Segmentation (What It Means)")
report.append("## Useful Inventory (Safe Counts Only)")
report.append("")
report.append(
"- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).\n"
"- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).\n"
"- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity)."
)
report.append("")
report.append(
"This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune."
)
if templates_total is not None and templates_repeat is not None:
report.append(f"- Total outgoing templates detected: **{templates_total:,}**")
report.append(f"- High-frequency repeat templates: **{templates_repeat:,}**")
if rescue_count is not None:
report.append(f"- “Rescue” events detected: **{rescue_count:,}**")
if pairs_count is not None:
report.append(f"- Training pairs (user → reply) available: **{pairs_count:,}**")
if templates_total is None and rescue_count is None and pairs_count is None:
report.append("- (No additional artifacts were found next to `summary.json`.)")
report.append("")
report.append("## 5) Top Detected Script Templates (Canonicalized)")
report.append("## What You Do Not Need to Know")
report.append("")
if top_bot:
for i, t in enumerate(top_bot[:10], 1):
canon = (t.get("canonical") or "").strip()
count = int(t.get("count") or 0)
report.append(f"- BOT #{i}: sent **{count}**× — `{canon[:160]}`")
else:
report.append("- (No high-frequency bot templates detected with current thresholds.)")
report.append("")
report.append("## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)")
report.append("")
if top_manual:
for i, t in enumerate(top_manual[:10], 1):
canon = (t.get("canonical") or "").strip()
count = int(t.get("count") or 0)
report.append(f"- MANUAL-ish #{i}: seen **{count}**× — `{canon[:160]}`")
else:
report.append("- (No low-frequency manual templates included in the cached top list.)")
report.append("")
report.append("## 7) Bot Template Performance (Reply/Conversion Heuristics)")
report.append("")
report.append("These come from `bot_performance_audit.csv` and are computed per canonical bot template.")
report.append("")
if top_audit:
report.append("### Most-used bot templates (by volume)")
for r in top_audit[:8]:
report.append(
f"- sent={r.get('sent')} reply_rate={r.get('reply_rate')} intent_rate={r.get('conversion_intent_rate')} confirmed_rate={r.get('conversion_confirmed_rate')} — `{(r.get('canonical_template') or '')[:140]}`"
)
report.append("")
if best_reply:
report.append("### Best reply-rate bot templates")
for r in best_reply[:8]:
report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`")
report.append("")
if worst_reply:
report.append("### Worst reply-rate bot templates")
for r in worst_reply[:8]:
report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`")
report.append("")
report.append("## 8) Objections → Best Sergio Replies (Playbook)")
report.append("")
if objection_blocks:
report.extend(objection_blocks)
else:
report.append("- No objection handlers detected with current keyword rules.")
report.append("")
report.append("## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)")
report.append("")
report.append(f"- Rescue events detected (heuristic): **{rescue_count:,}**")
report.append(
"A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal."
)
report.append("")
report.append("## 10) Product / Offer Evolution (Eras)")
report.append("")
report.append(
"This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly."
)
report.append("")
if era_offer_terms:
report.append("Recent quarters (top extracted offer signals):")
for line in era_offer_terms:
report.append(f"- {line}")
else:
report.append("- No offer signals detected in the most recent quarters with current extraction rules.")
report.append("")
report.append("## 11) Charts")
report.append("")
report.append(f"- Bot fatigue (weekly reply rate to the dominant bot script): `{inp.fatigue_png}`")
report.append(f"- Editorial timeline (top bot scripts vs conversions): `{inp.editorial_png}`")
report.append("")
report.append("## 12) What To Build From This (Agent Requirements)")
report.append("")
report.append("### Core behavior")
report.append("- Start with top bot templates for predictable openers and FAQ-style flows.")
report.append("- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.")
report.append("- Use a rescue cadence (time-based triggers) after silence.")
report.append("")
report.append("### Data products to drive the agent")
report.append(f"- Training pairs (manual-only, converted threads): `{inp.training_pairs}` (rows: ~{pairs_count:,})")
report.append(f"- Objection handlers: `{inp.objections}`")
report.append(f"- Rescue playbook: `{inp.rescue}`")
report.append(f"- Script templates + editorial drift: `{inp.templates}`")
report.append("")
report.append("### Safety boundaries (recommended)")
report.append("- Never request or store passwords/2FA codes.")
report.append("- Avoid medical/legal/financial advice; redirect to a call or a human.")
report.append("- If user asks to move off-platform, follow Sergios historical policy and business rules.")
report.append("")
report.append("## 13) What We Do NOT Need To Know (Ignore / Do Not Store)")
report.append("")
report.append("- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.")
report.append("- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.")
report.append("- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.")
report.append("- Individual one-off edge cases that never repeat (unless they represent a safety boundary).")
report.append("- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.")
report.append("")
report.append("## 14) Caveats / Gaps")
report.append("")
report.append("- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.")
report.append("- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.")
report.append("- Language detection is heuristic; improve it if you need precise bilingual routing.")
report.append("Do not store or copy these into an automation system unless you have a clear operational reason:")
report.append("- Names, handles, phone numbers, emails.")
report.append("- Full conversation transcripts for every thread.")
report.append("- Photos, videos, audio, and other attachments.")
report.append("- One-off edge cases that never repeat.")
report.append("")
out_path.parent.mkdir(parents=True, exist_ok=True)
@ -323,19 +147,20 @@ def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:
def main(argv: list[str] | None = None) -> int:
ap = argparse.ArgumentParser(description="Generate a human-readable English report from analyze_instagram_export outputs.")
ap.add_argument("--analysis-dir", required=True, help="directory produced by analyze_instagram_export (contains summary.json)")
ap.add_argument("--out", default=None, help="output markdown path (default: <analysis-dir>/dm_history_report_en.md)")
ap = argparse.ArgumentParser(description="Generate a short, safe DM history report from an analysis directory.")
ap.add_argument("--analysis-dir", required=True, help="analyze_instagram_export output directory")
ap.add_argument("--out", default=None, help="output markdown path (default: dm_history_report_en.md in CWD)")
args = ap.parse_args(argv)
analysis_dir = Path(args.analysis_dir)
out_path = Path(args.out) if args.out else (analysis_dir / "dm_history_report_en.md")
out_path = Path(args.out) if args.out else (Path.cwd() / "dm_history_report_en.md")
try:
p = generate_report(analysis_dir=analysis_dir, out_path=out_path)
print(json.dumps({"ok": True, "out": str(p)}, ensure_ascii=False))
return 0
except FileNotFoundError as e:
print(f"Missing required input: {e}", file=os.sys.stderr)
print(f"Missing analysis input: {e}", file=os.sys.stderr)
return 2
except Exception as e:
print(f"Report generation failed: {e}", file=os.sys.stderr)

View file

@ -13,7 +13,7 @@ from typing import Any, Iterable, Literal
from .analyze_instagram_export import canonicalize_text
DEFAULT_LOCAL_TZ_NAME = "Europe/Brussels"
DEFAULT_LOCAL_TZ_NAME = "Europe/Paris"
def _safe_chmod_600(path: Path) -> None:
@ -418,11 +418,11 @@ def _question_theme(text: str) -> str | None:
toks = s_compact.split()
if len(toks) == 1:
w = toks[0]
if w in {"book", "ebook", "libro", "pdf"}:
if w in {"book", "ebook", "libro", "pdf", "livre", "llibre"}:
return "Just one word: book"
if w in {"link", "enlace"}:
if w in {"link", "enlace", "lien", "enllac", "enllaç"}:
return "Just one word: link"
if w in {"price", "precio", "cost"}:
if w in {"price", "precio", "cost", "prix", "preu"}:
return "Just one word: price"
# "I tried, but it didn't arrive / it doesn't work"
@ -499,15 +499,48 @@ def _question_theme(text: str) -> str | None:
return "Where are you based?"
# Price / cost
if any(k in s for k in ("price", "cost", "how much", "$", "", "usd", "eur", "precio", "cuanto", "cuánto", "caro")):
if any(
k in s
for k in (
"price",
"cost",
"how much",
"$",
"",
"usd",
"eur",
"precio",
"cuanto",
"cuánto",
"caro",
"prix",
"preu",
)
):
return "What does it cost?"
# Link / payment link
if any(k in s for k in ("link", "send the link", "send me the link", "where is the link", "enlace", "stripe", "paypal", "checkout", "invoice")):
if any(
k in s
for k in (
"link",
"send the link",
"send me the link",
"where is the link",
"enlace",
"lien",
"enllaç",
"enllac",
"stripe",
"paypal",
"checkout",
"invoice",
)
):
return "Can you send the link?"
# Book / ebook / pdf
if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro")):
if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro", "livre", "llibre")):
return "Where do I get the book?"
# Call / schedule
@ -533,7 +566,7 @@ def _question_theme(text: str) -> str | None:
return "How do I book a call?"
# Video
if any(k in s for k in ("video", "vídeo", "youtube")):
if any(k in s for k in ("video", "vídeo", "vidéo", "youtube")):
return "Can you send the video?"
# Steps / what next
@ -541,11 +574,45 @@ def _question_theme(text: str) -> str | None:
return "What are the steps?"
# How it works / details
if any(k in s for k in ("how does", "how it works", "how does it work", "how does this work", "como funciona", "cómo funciona", "more info", "details", "explain")):
if any(
k in s
for k in (
"how does",
"how it works",
"how does it work",
"how does this work",
"como funciona",
"cómo funciona",
"more info",
"details",
"explain",
"comment ça marche",
"ça marche",
"com funciona",
)
):
return "How does it work?"
# What you do / what is this
if any(k in s for k in ("what is this", "what do you do", "what is it", "what do you offer", "service", "services", "que es", "qué es", "que haces", "qué haces", "de que va", "de qué va")):
if any(
k in s
for k in (
"what is this",
"what do you do",
"what is it",
"what do you offer",
"service",
"services",
"que es",
"qué es",
"que haces",
"qué haces",
"de que va",
"de qué va",
"c'est quoi",
"cest quoi",
)
):
return "What is this?"
# Trust / legitimacy
@ -597,6 +664,8 @@ def _offer_terms(text: str) -> set[str]:
("ebook", "Ebook"),
("e-book", "Ebook"),
("libro", "Book"),
("livre", "Book"),
("llibre", "Book"),
("pdf", "PDF"),
("call", "Call"),
("llamada", "Call"),
@ -935,20 +1004,17 @@ def generate_report(
now = datetime.now(timezone.utc).date().isoformat()
report: list[str] = []
report.append("# Socialmediatorr Instagram DM History : Plain-English Deep Report")
report.append("# Instagram DM History — Plain-English Deep Report")
report.append("")
report.append("## DM History Deep Report")
report.append("## What This Is")
report.append("")
report.append(f"**Subject:** Instagram direct messages for `@socialmediatorr`")
report.append("**Version:** v1.0 (STYLE BIBLE EN 3.0GM)")
report.append(f"**Inbox:** `@socialmediatorr`")
report.append(f"**Date:** {now}")
report.append("**Status:** REVIEW REQUIRED")
report.append("**Citation:** `if://report/socialmediatorr/instagram/dm-history/`")
report.append("**Author:** Danny Stocker | InfraFabric Research")
report.append("**Time zone used:** CET")
report.append("")
report.append("### How This Report Was Made")
report.append("")
report.append("> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.")
report.append("> This is a count of patterns. It is not a therapy note and it is not a sales ledger.")
report.append("")
report.append(
"This document was generated by reading an Instagram data export and counting repeat patterns over time. "
@ -957,9 +1023,9 @@ def generate_report(
report.append("")
report.append("---")
report.append("")
report.append(f"**Context:** This inbox contains a high-volume message-and-reply system over {window_days} days.")
report.append(f"**Context:** This inbox contains message history over {window_days} days.")
report.append("")
report.append("> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.")
report.append("> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.")
report.append("")
report.append(
"The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust."

19
tools/README.md Normal file
View file

@ -0,0 +1,19 @@
# Tools
## Mermaid checks (Markdown diagrams)
This repo uses Mermaid diagrams in Markdown reports.
Local lint (partial):
- `npm install`
- `npm run verify:mermaid`
Notes:
- This check validates diagram types supported by `@mermaid-js/parser` (for example: `pie`).
- Some diagram types (for example: `flowchart`) are not supported by that parser yet and will be reported as `skipped`.
Full validation (recommended):
- Use Forgejos built-in PDF export for the report file. If the PDF export succeeds, the diagrams compiled successfully.

109
tools/verify_mermaid.mjs Normal file
View file

@ -0,0 +1,109 @@
import fs from "node:fs";
import path from "node:path";
import process from "node:process";
import { parse } from "@mermaid-js/parser";
function isMarkdownFile(filePath) {
const lower = filePath.toLowerCase();
return lower.endsWith(".md") || lower.endsWith(".markdown");
}
function* walkFiles(rootPath) {
const stat = fs.statSync(rootPath);
if (stat.isFile()) {
yield rootPath;
return;
}
const entries = fs.readdirSync(rootPath, { withFileTypes: true });
for (const ent of entries) {
const full = path.join(rootPath, ent.name);
if (ent.isDirectory()) {
yield* walkFiles(full);
} else if (ent.isFile()) {
yield full;
}
}
}
function extractMermaidBlocks(markdownText) {
const blocks = [];
const re = /```mermaid\s*([\s\S]*?)```/g;
let m;
while ((m = re.exec(markdownText)) !== null) {
blocks.push(m[1] || "");
}
return blocks;
}
function detectDiagramType(code) {
const lines = String(code || "")
.replace(/\r\n?/g, "\n")
.split("\n")
.map((l) => l.trim())
.filter((l) => l && !l.startsWith("%%"));
if (!lines.length) return null;
const head = lines[0];
if (head.startsWith("pie")) return "pie";
if (head.startsWith("gitGraph")) return "gitGraph";
if (head.startsWith("architecture")) return "architecture";
if (head.startsWith("packet")) return "packet";
if (head.startsWith("info")) return "info";
if (head.startsWith("radar")) return "radar";
if (head.startsWith("treemap")) return "treemap";
// Not supported by @mermaid-js/parser yet (example: flowchart/sequence/class).
return null;
}
async function main() {
const args = process.argv.slice(2);
const roots = args.length ? args : ["reports"];
let ok = true;
let total = 0;
let failures = 0;
let skipped = 0;
for (const root of roots) {
for (const filePath of walkFiles(root)) {
if (!isMarkdownFile(filePath)) continue;
const text = fs.readFileSync(filePath, "utf8");
const blocks = extractMermaidBlocks(text);
if (!blocks.length) continue;
for (let i = 0; i < blocks.length; i++) {
const code = String(blocks[i] || "").trim();
total += 1;
const diagramType = detectDiagramType(code);
if (!diagramType) {
skipped += 1;
continue;
}
try {
await parse(diagramType, code);
} catch (err) {
ok = false;
failures += 1;
const msg =
err && typeof err === "object" && "message" in err ? String(err.message) : String(err);
console.error(`[mermaid] ${filePath} block=${i + 1} type=${diagramType}: ${msg}`);
}
}
}
}
if (ok) {
console.log(JSON.stringify({ ok: true, diagrams: total, skipped }, null, 2));
return 0;
}
console.error(JSON.stringify({ ok: false, diagrams: total, failures, skipped }, null, 2));
return 1;
}
process.exitCode = await main();