diff --git a/.gitignore b/.gitignore index 2e7a1fa..0804b90 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,7 @@ venv/ .vscode/ /dist/ /build/ - +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* diff --git a/README.md b/README.md index b973104..122c38e 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,12 @@ This produces the “Sergio persona” artifacts needed for the DM agent: Outputs are written with mode `600` and may contain sensitive DM content. Keep them out of git. +This repo includes **sanitized** example reports (no verbatim client DMs) under: + +- `reports/socialmediatorr/` + +Raw analysis artifacts (e.g., training pairs, rescued threads, template caches) should remain in a private working directory such as `/root/tmp/` and should not be committed. + ### Analyze a raw Instagram export folder (recommended) Optional: index first (lets you filter recency without scanning every thread): diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..a0659ad --- /dev/null +++ b/package-lock.json @@ -0,0 +1,174 @@ +{ + "name": "emo-social-insta-dm-agent-tools", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "emo-social-insta-dm-agent-tools", + "devDependencies": { + "@mermaid-js/parser": "^0.6.3" + } + }, + "node_modules/@chevrotain/cst-dts-gen": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-11.0.3.tgz", + "integrity": "sha512-BvIKpRLeS/8UbfxXxgC33xOumsacaeCKAjAeLyOn7Pcp95HiRbrpl14S+9vaZLolnbssPIUuiUd8IvgkRyt6NQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@chevrotain/gast": "11.0.3", + "@chevrotain/types": "11.0.3", + "lodash-es": "4.17.21" + } + }, + "node_modules/@chevrotain/gast": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/@chevrotain/gast/-/gast-11.0.3.tgz", + "integrity": "sha512-+qNfcoNk70PyS/uxmj3li5NiECO+2YKZZQMbmjTqRI3Qchu8Hig/Q9vgkHpI3alNjr7M+a2St5pw5w5F6NL5/Q==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@chevrotain/types": "11.0.3", + "lodash-es": "4.17.21" + } + }, + "node_modules/@chevrotain/regexp-to-ast": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/@chevrotain/regexp-to-ast/-/regexp-to-ast-11.0.3.tgz", + "integrity": "sha512-1fMHaBZxLFvWI067AVbGJav1eRY7N8DDvYCTwGBiE/ytKBgP8azTdgyrKyWZ9Mfh09eHWb5PgTSO8wi7U824RA==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/@chevrotain/types": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-11.0.3.tgz", + "integrity": "sha512-gsiM3G8b58kZC2HaWR50gu6Y1440cHiJ+i3JUvcp/35JchYejb2+5MVeJK0iKThYpAa/P2PYFV4hoi44HD+aHQ==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/@chevrotain/utils": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-11.0.3.tgz", + "integrity": "sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/@mermaid-js/parser": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/@mermaid-js/parser/-/parser-0.6.3.tgz", + "integrity": "sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==", + "dev": true, + "license": "MIT", + "dependencies": { + "langium": "3.3.1" + } + }, + "node_modules/chevrotain": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.0.3.tgz", + "integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@chevrotain/cst-dts-gen": "11.0.3", + "@chevrotain/gast": "11.0.3", + "@chevrotain/regexp-to-ast": "11.0.3", + "@chevrotain/types": "11.0.3", + "@chevrotain/utils": "11.0.3", + "lodash-es": "4.17.21" + } + }, + "node_modules/chevrotain-allstar": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/chevrotain-allstar/-/chevrotain-allstar-0.3.1.tgz", + "integrity": "sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==", + "dev": true, + "license": "MIT", + "dependencies": { + "lodash-es": "^4.17.21" + }, + "peerDependencies": { + "chevrotain": "^11.0.0" + } + }, + "node_modules/langium": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/langium/-/langium-3.3.1.tgz", + "integrity": "sha512-QJv/h939gDpvT+9SiLVlY7tZC3xB2qK57v0J04Sh9wpMb6MP1q8gB21L3WIo8T5P1MSMg3Ep14L7KkDCFG3y4w==", + "dev": true, + "license": "MIT", + "dependencies": { + "chevrotain": "~11.0.3", + "chevrotain-allstar": "~0.3.0", + "vscode-languageserver": "~9.0.1", + "vscode-languageserver-textdocument": "~1.0.11", + "vscode-uri": "~3.0.8" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/lodash-es": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.21.tgz", + "integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==", + "dev": true, + "license": "MIT" + }, + "node_modules/vscode-jsonrpc": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/vscode-languageserver": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/vscode-languageserver/-/vscode-languageserver-9.0.1.tgz", + "integrity": "sha512-woByF3PDpkHFUreUa7Hos7+pUWdeWMXRd26+ZX2A8cFx6v/JPTtd4/uN0/jB6XQHYaOlHbio03NTHCqrgG5n7g==", + "dev": true, + "license": "MIT", + "dependencies": { + "vscode-languageserver-protocol": "3.17.5" + }, + "bin": { + "installServerIntoExtension": "bin/installServerIntoExtension" + } + }, + "node_modules/vscode-languageserver-protocol": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", + "dev": true, + "license": "MIT", + "dependencies": { + "vscode-jsonrpc": "8.2.0", + "vscode-languageserver-types": "3.17.5" + } + }, + "node_modules/vscode-languageserver-textdocument": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/vscode-languageserver-textdocument/-/vscode-languageserver-textdocument-1.0.12.tgz", + "integrity": "sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA==", + "dev": true, + "license": "MIT" + }, + "node_modules/vscode-languageserver-types": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==", + "dev": true, + "license": "MIT" + }, + "node_modules/vscode-uri": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/vscode-uri/-/vscode-uri-3.0.8.tgz", + "integrity": "sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..53d2d3f --- /dev/null +++ b/package.json @@ -0,0 +1,11 @@ +{ + "name": "emo-social-insta-dm-agent-tools", + "private": true, + "type": "module", + "devDependencies": { + "@mermaid-js/parser": "^0.6.3" + }, + "scripts": { + "verify:mermaid": "node tools/verify_mermaid.mjs" + } +} diff --git a/reports/socialmediatorr/dm_history_report_en.md b/reports/socialmediatorr/dm_history_report_en.md index 356f63f..4d3fad4 100644 --- a/reports/socialmediatorr/dm_history_report_en.md +++ b/reports/socialmediatorr/dm_history_report_en.md @@ -1,163 +1,42 @@ -# Socialmediatorr Instagram DM History — Human Readable Report (English) +# Instagram DM History — Short Report (English) - Generated: `2025-12-24T02:28:34+00:00` -- Owner name used: `Sergio de Vocht` +- Inbox: `@socialmediatorr` -## 1) What This Dataset Represents +## What This Is -This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergio’s style. -The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio). +This is a short, plain-English summary of the DM history scan. +It avoids quoting private messages and it avoids storing personal identities. -## 2) High-Level Metrics (All-Time) +## Key Numbers - Conversations analyzed: **10,061** -- Bot-only conversations: **1,883** (18.7%) -- Human-intervened conversations: **8,153** (81.0%) -- Conversion (intent signals): **1,923** (19.1%) -- Conversion (confirmed signals): **55** (0.5%) +- Conversations that stayed template-only: **1,883** (18.7%) +- Conversations that included custom replies: **8,153** (81.0%) +- Buying/booking signals (weak): **1,923** (19.1%) +- Buying/booking signals (strong): **55** (0.5%) -Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger. +Buying/booking signals are detected from text patterns (they are not a payment ledger). -## 3) Sergio Persona (From Manual/Hybrid Replies) +## What You Need to Know -- Typical reply length: median **60.0** chars (p90 **67.0**) -- Questions: **2.4%** | Exclamations: **1.7%** | Emoji: **0.0%** -- Language guess (manual replies): en=8043, es=423, unknown=224 +The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks. +For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read: -Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage. +- `reports/socialmediatorr/dm_history_report_en_detailed.md` -## 4) Bot vs Human Segmentation (What It Means) +## Useful Inventory (Safe Counts Only) -- **[BOT]** = outgoing message template repeated frequently (>= configured threshold). -- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold). -- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity). +- Total outgoing templates detected: **8,550** +- High-frequency repeat templates: **24** +- “Rescue” events detected: **7** +- Training pairs (user → reply) available: **524** -This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune. +## What You Do Not Need to Know -## 5) Top Detected Script Templates (Canonicalized) - -- BOT #1: sent **2495**× — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo` -- BOT #2: sent **2483**× — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien contigo mism` -- BOT #3: sent **2483**× — `te lo dejo por aquã dame un minuto` -- BOT #4: sent **2483**× — `me gustarã a saber tu opiniã³n` -- BOT #5: sent **1878**× — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â conceptos de desarrollo personal que te estã n quitando paz` -- BOT #6: sent **1878**× — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influyendo en tus relacione` -- BOT #7: sent **706**× — `gracias por ese feedback ð` -- BOT #8: sent **706**× — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s conflicto ãºltimament` -- BOT #9: sent **680**× — `you sent a private reply to a comment on your instagram post` -- BOT #10: sent **469**× — `por cierto` - -## 6) Human Reply Library (Rare/Manual Examples, Canonicalized) - -- MANUAL-ish #1: seen **10**× — `quã bonito leer eso a veces entender las palabras abre puertas nuevas â sientes que en tu entorno hay algo que te gustarã a armonizar mã s` -- MANUAL-ish #2: seen **7**× — `buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c` -- MANUAL-ish #3: seen **5**× — `hola buenas como estas ð espero que estã s bien me gustarã a saber que es lo q te ha echo estar aquã y querer saber mã s sobre nuestras formaciã³n` -- MANUAL-ish #4: seen **5**× — `y si pudieras resolver esto cã³mo crees que cambiarã a tu forma de relacionarte o sentirte` -- MANUAL-ish #5: seen **5**× — `para conocerte un poquito mã s que te gustarã a conseguir con emosocial cual es tu mayor desafã o actualmente dentro de tus relaciones` -- MANUAL-ish #6: seen **4**× — `okey te entiendo perfectamente ð segãºn lo que me comentas creo que esta lista de videos de youtube te va a venir genial para empezar a entender las bases del c` -- MANUAL-ish #7: seen **4**× — `buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c` -- MANUAL-ish #8: seen **3**× — `hola buenas como estas espero que bien cuã ntame que te parece el contenido que estamos ofreciendo por whatsapp te leoð ð` - -## 7) Bot Template Performance (Reply/Conversion Heuristics) - -These come from `bot_performance_audit.csv` and are computed per canonical bot template. - -### Most-used bot templates (by volume) -- sent=2495 reply_rate=0.376 intent_rate=0.0766 confirmed_rate=0.012 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo` -- sent=2483 reply_rate=0.0334 intent_rate=0.0769 confirmed_rate=0.0121 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co` -- sent=2483 reply_rate=0.1188 intent_rate=0.0769 confirmed_rate=0.0121 — `te lo dejo por aquã dame un minuto` -- sent=2483 reply_rate=0.0028 intent_rate=0.0769 confirmed_rate=0.0121 — `me gustarã a saber tu opiniã³n` -- sent=1878 reply_rate=0.0 intent_rate=0.0 confirmed_rate=0.0005 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â conceptos de desarrollo personal que te estã n quitando pa` -- sent=1878 reply_rate=0.1768 intent_rate=0.0 confirmed_rate=0.0005 — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influye` -- sent=706 reply_rate=0.0042 intent_rate=0.1048 confirmed_rate=0.017 — `gracias por ese feedback ð` -- sent=706 reply_rate=0.8187 intent_rate=0.1048 confirmed_rate=0.017 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c` - -### Best reply-rate bot templates -- reply_rate=0.8187 sent=706 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c` -- reply_rate=0.7143 sent=98 — `pudiste entrar correctamente` -- reply_rate=0.7022 sent=178 — `por favor toca una de las siguientes opciones ð` -- reply_rate=0.4701 sent=134 — `pudiste verlo` -- reply_rate=0.4602 sent=176 — `que te pareciã³ ese diccionario hay alguna palabra que sueles utilizar y no te habã as dado cuenta` -- reply_rate=0.376 sent=2495 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo` -- reply_rate=0.3458 sent=240 — `gracias por tu sinceridad ð` -- reply_rate=0.3291 sent=158 — `te dejo este video donde explico por quã las relaciones de pareja entran en conflicto aunque haya amor` - -### Worst reply-rate bot templates -- reply_rate=0.0 sent=1878 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â conceptos de desarrollo personal que te estã n quitando pa` -- reply_rate=0.0 sent=337 — `enhorabuena por querer dar ese cambio estã s a un paso de transformar tu relaciã³n en solo dã as te invito a un taller exclusivo donde` -- reply_rate=0.0 sent=158 — `gracias por compartirlo â ï` -- reply_rate=0.0 sent=131 — `entiendo perfectamente ð` -- reply_rate=0.0 sent=54 — `this account can t receive your message because they don t allow new message requests from everyone` -- reply_rate=0.0028 sent=2483 — `me gustarã a saber tu opiniã³n` -- reply_rate=0.0042 sent=706 — `gracias por ese feedback ð` -- reply_rate=0.0334 sent=2483 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co` - -## 8) Objections → Best Sergio Replies (Playbook) - -### price -- (1) Ey Alex que tal -- (1) Qué bonito leer eso. A veces entender las palabras abre puertas nuevas. ¿Sientes que en tu entorno hay algo que te gustaría armonizar más? -- (1) Y que es lo que te impide dar ese cambio? Te veo con mucha seguridad -### time -- (1) Brutal esto que dices -- (1) No es una herida ELA! Apego que no te dieron tus padres es solo una parte del espectro, necesitamos validación del mundo y de forma constante, no es una herida del pasado es algo que falta darnos en el presente. -- (1) Vaya, suena bastante frustrante el hecho de querer "bajar esa guardia", y sentir que cuando lo haces, todo cambia -### trust -- (2) Hola Dani, gracias por el mensaje bonito, de verdad. Me alegra mucho saber que el contenido te está ayudando a mirar las cosas desde otro punto de vista -- (2) Qué bonito leer eso. A veces entender las palabras abre puertas nuevas. ¿Sientes que en tu entorno hay algo que te gustaría armonizar más? -- (2) En la plataforma no sale por ningún lugar, y normalmente siempre llegan 2 emails, 1 de confirmación de pago y otro de bienvenida - -## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity) - -- Rescue events detected (heuristic): **7** -A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal. - -## 10) Product / Offer Evolution (Eras) - -This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly. - -Recent quarters (top extracted offer signals): -- stripe(1) -- book(1912); ebook(1912); call(8); calendly(7); coaching(2); stripe(2); pdf(2); paypal(1) - -## 11) Charts - -- Bot fatigue (weekly reply rate to the dominant bot script): `bot_fatigue_chart.png` - -![](bot_fatigue_chart.png) -- Editorial timeline (top bot scripts vs conversions): `editorial_timeline.png` - -![](editorial_timeline.png) - -## 12) What To Build From This (Agent Requirements) - -### Core behavior -- Start with top bot templates for predictable openers and FAQ-style flows. -- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls. -- Use a rescue cadence (time-based triggers) after silence. - -### Data products to drive the agent -- Training pairs (manual-only, converted threads): `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/training_pairs.jsonl` (rows: ~524) -- Objection handlers: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/objection_handlers.json` -- Rescue playbook: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/rescue_playbook.json` -- Script templates + editorial drift: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/top_outgoing_templates.json` - -### Safety boundaries (recommended) -- Never request or store passwords/2FA codes. -- Avoid medical/legal/financial advice; redirect to a call or a human. -- If user asks to move off-platform, follow Sergio’s historical policy and business rules. - -## 13) What We Do NOT Need To Know (Ignore / Do Not Store) - -- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing. -- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk. -- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets. -- Individual one-off edge cases that never repeat (unless they represent a safety boundary). -- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`. - -## 14) Caveats / Gaps - -- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity. -- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution. -- Language detection is heuristic; improve it if you need precise bilingual routing. +Do not store or copy these into an automation system unless you have a clear operational reason: +- Names, handles, phone numbers, emails. +- Full conversation transcripts for every thread. +- Photos, videos, audio, and other attachments. +- One-off edge cases that never repeat. diff --git a/reports/socialmediatorr/dm_history_report_en_detailed.md b/reports/socialmediatorr/dm_history_report_en_detailed.md index cd09cd1..3c53923 100644 --- a/reports/socialmediatorr/dm_history_report_en_detailed.md +++ b/reports/socialmediatorr/dm_history_report_en_detailed.md @@ -1,25 +1,22 @@ -# Socialmediatorr Instagram DM History : Plain-English Deep Report +# Instagram DM History — Plain-English Deep Report -## DM History Deep Report +## What This Is -**Subject:** Instagram direct messages for `@socialmediatorr` -**Version:** v1.0 (STYLE BIBLE EN 3.0GM) +**Inbox:** `@socialmediatorr` **Date:** 2025-12-24 -**Status:** REVIEW REQUIRED -**Citation:** `if://report/socialmediatorr/instagram/dm-history/` -**Author:** Danny Stocker | InfraFabric Research +**Time zone used:** CET ### How This Report Was Made -> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger. +> This is a count of patterns. It is not a therapy note and it is not a sales ledger. This document was generated by reading an Instagram data export and counting repeat patterns over time. It avoids quoting private client messages and it avoids storing personal identities. --- -**Context:** This inbox contains a high-volume message-and-reply system over 429 days. +**Context:** This inbox contains message history over 429 days. -> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book. +> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it. The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust. @@ -35,7 +32,7 @@ Across the observed window, you sent a very large number of messages and you rec | Total messages | 54,069 | Instagram export | | Messages you sent | 43,607 | Instagram export | | Messages people sent you | 10,462 | Instagram export | -| Messages that look like a question or a request | 2,713 | Instagram export | +| Messages that look like a question or a request | 2,715 | Instagram export | | System messages about new followers (auto text in the inbox) | 8,081 | Instagram export | ### What You Need to Know (In Plain English) @@ -67,7 +64,7 @@ To avoid guesswork, we start with 3-month blocks (a simple way to smooth noise), | 2025 Jan-Mar | 21 | 0 | 0 | | 2025 Apr-Jun | 92 | 97 | 15 | | 2025 Jul-Sep | 623 | 882 | 89 | -| 2025 Oct-Dec | 9,712 | 42,628 | 2,609 | +| 2025 Oct-Dec | 9,712 | 42,628 | 2,611 | Same data as charts: @@ -112,8 +109,8 @@ This month-by-month table is the clearest view of how the inbox changed over tim | 2025-08 | 193 | 230 | 28 | 50.0% | | 2025-09 | 284 | 330 | 24 | 20.8% | | 2025-10 | 787 | 1,190 | 64 | 17.2% | -| 2025-11 | 854 | 2,194 | 149 | 46.3% | -| 2025-12 | 8,071 | 39,244 | 2,396 | 89.6% | +| 2025-11 | 854 | 2,194 | 150 | 46.7% | +| 2025-12 | 8,071 | 39,244 | 2,397 | 89.7% | The busiest month was **2025-12** with **47,315** messages total (87.5% of everything in this export). That single month dominates the shape of the data. @@ -126,7 +123,7 @@ Use this to time follow-ups and first messages. Do not spread effort evenly acro | Day of week | Messages from people | Messages you sent | Questions/requests | |---|---:|---:|---:| | Monday | 1,600 | 8,359 | 131 | -| Tuesday | 1,939 | 9,654 | 192 | +| Tuesday | 1,939 | 9,654 | 194 | | Wednesday | 1,282 | 5,554 | 159 | | Thursday | 2,261 | 6,908 | 1,268 | | Friday | 1,705 | 5,733 | 803 | @@ -184,7 +181,7 @@ One caution: “fast replies” are often repeat messages. This section shows ov | Typical time to reply to questions/requests | 2 seconds | Instagram export | | Slow end for questions/requests (90% are faster) | 4 seconds | Instagram export | | Messages from people answered within 48 hours | 7,467 (71.4%) | Instagram export | -| Questions/requests answered within 48 hours | 2,278 (84.0%) | Instagram export | +| Questions/requests answered within 48 hours | 2,280 (84.0%) | Instagram export | Breakdown by message type (repeat messages vs custom messages): @@ -242,11 +239,11 @@ This list is grouped by meaning (not by exact wording). It includes very short r | Rank | Topic (plain English) | Count | Share of all questions/requests | |---:|---|---:|---:| | 1 | Just one word: book | 1,857 | 68.4% | -| 2 | What is this? | 206 | 7.6% | -| 3 | Can you send the video? | 191 | 7.0% | -| 4 | Other question | 120 | 4.4% | +| 2 | What is this? | 203 | 7.5% | +| 3 | Can you send the video? | 189 | 7.0% | +| 4 | Other question | 118 | 4.3% | | 5 | Can you help me? | 74 | 2.7% | -| 6 | Can you send the link? | 61 | 2.2% | +| 6 | Can you send the link? | 70 | 2.6% | | 7 | What does it cost? | 53 | 2.0% | | 8 | Is this therapy? | 44 | 1.6% | | 9 | Where do I get the book? | 36 | 1.3% | @@ -261,12 +258,12 @@ This list is grouped by meaning (not by exact wording). It includes very short r | 18 | Can I get a refund? | 1 | 0.0% | | 19 | How long does it take? | 1 | 0.0% | -In plain terms: **1,893** of **2,713** questions/requests are about the book (69.8%). +In plain terms: **1,893** of **2,715** questions/requests are about the book (69.7%). ```mermaid pie title Questions/Requests: Book vs Everything Else "Book" : 1893 - "Everything else" : 820 + "Everything else" : 822 ``` ### Content Patterns (What You Mention When You Sell) diff --git a/sergio_instagram_messaging/generate_dm_report.py b/sergio_instagram_messaging/generate_dm_report.py index d24fba1..4685646 100644 --- a/sergio_instagram_messaging/generate_dm_report.py +++ b/sergio_instagram_messaging/generate_dm_report.py @@ -1,10 +1,8 @@ from __future__ import annotations import argparse -import csv import json import os -import statistics from dataclasses import dataclass from pathlib import Path from typing import Any @@ -21,11 +19,6 @@ def _load_json(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8", errors="replace")) -def _read_csv(path: Path) -> list[dict[str, str]]: - with path.open("r", encoding="utf-8", newline="") as f: - return list(csv.DictReader(f)) - - def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int: n = 0 with path.open("r", encoding="utf-8", errors="replace") as f: @@ -36,284 +29,115 @@ def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int: return n -def _pct(x: float) -> str: - return f"{x*100:.1f}%" +def _pct(num: int, den: int) -> str: + return "n/a" if den <= 0 else f"{(num/den)*100:.1f}%" @dataclass(frozen=True) class ReportInputs: summary: Path templates: Path - bot_audit: Path - objections: Path rescue: Path - eras: Path training_pairs: Path - fatigue_png: Path - editorial_png: Path def _resolve_inputs(analysis_dir: Path) -> ReportInputs: return ReportInputs( summary=analysis_dir / "summary.json", templates=analysis_dir / "top_outgoing_templates.json", - bot_audit=analysis_dir / "bot_performance_audit.csv", - objections=analysis_dir / "objection_handlers.json", rescue=analysis_dir / "rescue_playbook.json", - eras=analysis_dir / "sergio_eras.csv", training_pairs=analysis_dir / "training_pairs.jsonl", - fatigue_png=analysis_dir / "bot_fatigue_chart.png", - editorial_png=analysis_dir / "editorial_timeline.png", ) def generate_report(*, analysis_dir: Path, out_path: Path) -> Path: inp = _resolve_inputs(analysis_dir) - for p in inp.__dict__.values(): - if not Path(p).exists(): - raise FileNotFoundError(str(p)) + if not inp.summary.exists(): + raise FileNotFoundError(str(inp.summary)) summary = _load_json(inp.summary) - templates = _load_json(inp.templates) - objections = _load_json(inp.objections) - rescues = _load_json(inp.rescue) - bot_audit = _read_csv(inp.bot_audit) - owner = summary.get("owner_name") or "Unknown" conv = summary.get("conversations") or {} conv_total = int(conv.get("total") or 0) - bot_only = int(conv.get("bot_only") or 0) - human = int(conv.get("human_intervened") or 0) - conversions = summary.get("conversions") or {} - conv_intent = int(conversions.get("intent") or 0) - conv_confirmed = int(conversions.get("confirmed") or 0) + template_only = int(conv.get("bot_only") or 0) + custom_replies = int(conv.get("human_intervened") or 0) - bot_only_rate = (bot_only / conv_total) if conv_total else 0.0 - human_rate = (human / conv_total) if conv_total else 0.0 - intent_rate = (conv_intent / conv_total) if conv_total else 0.0 - confirmed_rate = (conv_confirmed / conv_total) if conv_total else 0.0 + buying = summary.get("conversions") or {} + buying_weak = int(buying.get("intent") or 0) + buying_strong = int(buying.get("confirmed") or 0) - manual_style = summary.get("manual_style") or {} - median_len = manual_style.get("median_len_chars") - p90_len = manual_style.get("p90_len_chars") - question_rate = float(manual_style.get("question_rate") or 0.0) - exclaim_rate = float(manual_style.get("exclaim_rate") or 0.0) - emoji_rate = float(manual_style.get("emoji_rate") or 0.0) - lang_guess = manual_style.get("lang_guess") or {} + templates_total = None + templates_repeat = None + if inp.templates.exists(): + t = _load_json(inp.templates) + templates_total = int(t.get("templates_total") or 0) + templates_repeat = int(t.get("bot_templates") or 0) - # Templates: prefer canonical strings (safe-ish) and avoid raw samples. - top_templates = templates.get("top_templates") or [] - top_bot = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "bot"] - top_manual = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "manual"] - - # Bot audit: best/worst by reply_rate. - def fnum(v: str | None) -> float: + rescue_count = None + if inp.rescue.exists(): try: - return float(v or 0) + rescue = _load_json(inp.rescue) + rescue_count = len(rescue) if isinstance(rescue, list) else 0 except Exception: - return 0.0 + rescue_count = None - bot_audit_sorted = sorted(bot_audit, key=lambda r: fnum(r.get("sent")), reverse=True) - top_audit = bot_audit_sorted[:10] - best_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")), reverse=True)[:10] - worst_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")))[:10] + pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000) if inp.training_pairs.exists() else None - # Objections: most common replies per category. - objection_blocks: list[str] = [] - if isinstance(objections, dict): - for cat in ("price", "time", "trust", "stop"): - replies = objections.get(cat) or [] - if not isinstance(replies, list) or not replies: - continue - top3 = [] - for r in replies[:3]: - if not isinstance(r, dict): - continue - top3.append(f"- ({r.get('count')}) {r.get('reply')}") - if top3: - objection_blocks.append(f"### {cat}\n" + "\n".join(top3)) + generated_at = summary.get("generated_at") if isinstance(summary.get("generated_at"), str) else None - rescue_count = len(rescues) if isinstance(rescues, list) else 0 - pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000) - - # Era summary: simple high-level notes. - eras_rows = _read_csv(inp.eras) - era_recent = eras_rows[-6:] if len(eras_rows) > 6 else eras_rows - era_offer_terms: list[str] = [] - for row in era_recent: - offers = (row.get("top_offers") or "").strip() - if offers: - era_offer_terms.append(offers) - - # A few derived notes. - lang_line = ", ".join(f"{k}={v}" for k, v in lang_guess.items()) - - # Summarize bot fatigue trend from image existence only (analysis already made it). - report = [] - report.append("# Socialmediatorr Instagram DM History — Human Readable Report (English)") + report: list[str] = [] + report.append("# Instagram DM History — Short Report (English)") report.append("") - report.append(f"- Generated: `{summary.get('generated_at')}`") - report.append(f"- Owner name used: `{owner}`") + if generated_at: + report.append(f"- Generated: `{generated_at}`") + report.append("- Inbox: `@socialmediatorr`") report.append("") - report.append("## 1) What This Dataset Represents") + report.append("## What This Is") report.append("") - report.append( - "This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergio’s style." - ) - report.append( - "The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio)." - ) + report.append("This is a short, plain-English summary of the DM history scan.") + report.append("It avoids quoting private messages and it avoids storing personal identities.") report.append("") - report.append("## 2) High-Level Metrics (All-Time)") + report.append("## Key Numbers") report.append("") report.append(f"- Conversations analyzed: **{conv_total:,}**") - report.append(f"- Bot-only conversations: **{bot_only:,}** ({_pct(bot_only_rate)})") - report.append(f"- Human-intervened conversations: **{human:,}** ({_pct(human_rate)})") - report.append(f"- Conversion (intent signals): **{conv_intent:,}** ({_pct(intent_rate)})") - report.append(f"- Conversion (confirmed signals): **{conv_confirmed:,}** ({_pct(confirmed_rate)})") + report.append(f"- Conversations that stayed template-only: **{template_only:,}** ({_pct(template_only, conv_total)})") + report.append(f"- Conversations that included custom replies: **{custom_replies:,}** ({_pct(custom_replies, conv_total)})") + report.append(f"- Buying/booking signals (weak): **{buying_weak:,}** ({_pct(buying_weak, conv_total)})") + report.append(f"- Buying/booking signals (strong): **{buying_strong:,}** ({_pct(buying_strong, conv_total)})") report.append("") - report.append( - "Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger." - ) + report.append("Buying/booking signals are detected from text patterns (they are not a payment ledger).") report.append("") - report.append("## 3) Sergio Persona (From Manual/Hybrid Replies)") + report.append("## What You Need to Know") report.append("") - report.append(f"- Typical reply length: median **{median_len}** chars (p90 **{p90_len}**)") - report.append(f"- Questions: **{_pct(question_rate)}** | Exclamations: **{_pct(exclaim_rate)}** | Emoji: **{_pct(emoji_rate)}**") - report.append(f"- Language guess (manual replies): {lang_line or 'n/a'}") + report.append("The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.") + report.append("For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:") report.append("") - report.append("Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.") + report.append("- `reports/socialmediatorr/dm_history_report_en_detailed.md`") report.append("") - report.append("## 4) Bot vs Human Segmentation (What It Means)") + report.append("## Useful Inventory (Safe Counts Only)") report.append("") - report.append( - "- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).\n" - "- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).\n" - "- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity)." - ) - report.append("") - report.append( - "This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune." - ) + if templates_total is not None and templates_repeat is not None: + report.append(f"- Total outgoing templates detected: **{templates_total:,}**") + report.append(f"- High-frequency repeat templates: **{templates_repeat:,}**") + if rescue_count is not None: + report.append(f"- “Rescue” events detected: **{rescue_count:,}**") + if pairs_count is not None: + report.append(f"- Training pairs (user → reply) available: **{pairs_count:,}**") + if templates_total is None and rescue_count is None and pairs_count is None: + report.append("- (No additional artifacts were found next to `summary.json`.)") report.append("") - report.append("## 5) Top Detected Script Templates (Canonicalized)") + report.append("## What You Do Not Need to Know") report.append("") - if top_bot: - for i, t in enumerate(top_bot[:10], 1): - canon = (t.get("canonical") or "").strip() - count = int(t.get("count") or 0) - report.append(f"- BOT #{i}: sent **{count}**× — `{canon[:160]}`") - else: - report.append("- (No high-frequency bot templates detected with current thresholds.)") - report.append("") - - report.append("## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)") - report.append("") - if top_manual: - for i, t in enumerate(top_manual[:10], 1): - canon = (t.get("canonical") or "").strip() - count = int(t.get("count") or 0) - report.append(f"- MANUAL-ish #{i}: seen **{count}**× — `{canon[:160]}`") - else: - report.append("- (No low-frequency manual templates included in the cached top list.)") - report.append("") - - report.append("## 7) Bot Template Performance (Reply/Conversion Heuristics)") - report.append("") - report.append("These come from `bot_performance_audit.csv` and are computed per canonical bot template.") - report.append("") - if top_audit: - report.append("### Most-used bot templates (by volume)") - for r in top_audit[:8]: - report.append( - f"- sent={r.get('sent')} reply_rate={r.get('reply_rate')} intent_rate={r.get('conversion_intent_rate')} confirmed_rate={r.get('conversion_confirmed_rate')} — `{(r.get('canonical_template') or '')[:140]}`" - ) - report.append("") - if best_reply: - report.append("### Best reply-rate bot templates") - for r in best_reply[:8]: - report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`") - report.append("") - if worst_reply: - report.append("### Worst reply-rate bot templates") - for r in worst_reply[:8]: - report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`") - report.append("") - - report.append("## 8) Objections → Best Sergio Replies (Playbook)") - report.append("") - if objection_blocks: - report.extend(objection_blocks) - else: - report.append("- No objection handlers detected with current keyword rules.") - report.append("") - - report.append("## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)") - report.append("") - report.append(f"- Rescue events detected (heuristic): **{rescue_count:,}**") - report.append( - "A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal." - ) - report.append("") - - report.append("## 10) Product / Offer Evolution (Eras)") - report.append("") - report.append( - "This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly." - ) - report.append("") - if era_offer_terms: - report.append("Recent quarters (top extracted offer signals):") - for line in era_offer_terms: - report.append(f"- {line}") - else: - report.append("- No offer signals detected in the most recent quarters with current extraction rules.") - report.append("") - - report.append("## 11) Charts") - report.append("") - report.append(f"- Bot fatigue (weekly reply rate to the dominant bot script): `{inp.fatigue_png}`") - report.append(f"- Editorial timeline (top bot scripts vs conversions): `{inp.editorial_png}`") - report.append("") - - report.append("## 12) What To Build From This (Agent Requirements)") - report.append("") - report.append("### Core behavior") - report.append("- Start with top bot templates for predictable openers and FAQ-style flows.") - report.append("- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.") - report.append("- Use a rescue cadence (time-based triggers) after silence.") - report.append("") - report.append("### Data products to drive the agent") - report.append(f"- Training pairs (manual-only, converted threads): `{inp.training_pairs}` (rows: ~{pairs_count:,})") - report.append(f"- Objection handlers: `{inp.objections}`") - report.append(f"- Rescue playbook: `{inp.rescue}`") - report.append(f"- Script templates + editorial drift: `{inp.templates}`") - report.append("") - report.append("### Safety boundaries (recommended)") - report.append("- Never request or store passwords/2FA codes.") - report.append("- Avoid medical/legal/financial advice; redirect to a call or a human.") - report.append("- If user asks to move off-platform, follow Sergio’s historical policy and business rules.") - report.append("") - - report.append("## 13) What We Do NOT Need To Know (Ignore / Do Not Store)") - report.append("") - report.append("- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.") - report.append("- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.") - report.append("- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.") - report.append("- Individual one-off edge cases that never repeat (unless they represent a safety boundary).") - report.append("- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.") - report.append("") - - report.append("## 14) Caveats / Gaps") - report.append("") - report.append("- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.") - report.append("- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.") - report.append("- Language detection is heuristic; improve it if you need precise bilingual routing.") + report.append("Do not store or copy these into an automation system unless you have a clear operational reason:") + report.append("- Names, handles, phone numbers, emails.") + report.append("- Full conversation transcripts for every thread.") + report.append("- Photos, videos, audio, and other attachments.") + report.append("- One-off edge cases that never repeat.") report.append("") out_path.parent.mkdir(parents=True, exist_ok=True) @@ -323,19 +147,20 @@ def generate_report(*, analysis_dir: Path, out_path: Path) -> Path: def main(argv: list[str] | None = None) -> int: - ap = argparse.ArgumentParser(description="Generate a human-readable English report from analyze_instagram_export outputs.") - ap.add_argument("--analysis-dir", required=True, help="directory produced by analyze_instagram_export (contains summary.json)") - ap.add_argument("--out", default=None, help="output markdown path (default: /dm_history_report_en.md)") + ap = argparse.ArgumentParser(description="Generate a short, safe DM history report from an analysis directory.") + ap.add_argument("--analysis-dir", required=True, help="analyze_instagram_export output directory") + ap.add_argument("--out", default=None, help="output markdown path (default: dm_history_report_en.md in CWD)") args = ap.parse_args(argv) analysis_dir = Path(args.analysis_dir) - out_path = Path(args.out) if args.out else (analysis_dir / "dm_history_report_en.md") + out_path = Path(args.out) if args.out else (Path.cwd() / "dm_history_report_en.md") + try: p = generate_report(analysis_dir=analysis_dir, out_path=out_path) print(json.dumps({"ok": True, "out": str(p)}, ensure_ascii=False)) return 0 except FileNotFoundError as e: - print(f"Missing required input: {e}", file=os.sys.stderr) + print(f"Missing analysis input: {e}", file=os.sys.stderr) return 2 except Exception as e: print(f"Report generation failed: {e}", file=os.sys.stderr) diff --git a/sergio_instagram_messaging/generate_dm_report_detailed.py b/sergio_instagram_messaging/generate_dm_report_detailed.py index 5feef95..b1653ba 100644 --- a/sergio_instagram_messaging/generate_dm_report_detailed.py +++ b/sergio_instagram_messaging/generate_dm_report_detailed.py @@ -13,7 +13,7 @@ from typing import Any, Iterable, Literal from .analyze_instagram_export import canonicalize_text -DEFAULT_LOCAL_TZ_NAME = "Europe/Brussels" +DEFAULT_LOCAL_TZ_NAME = "Europe/Paris" def _safe_chmod_600(path: Path) -> None: @@ -418,11 +418,11 @@ def _question_theme(text: str) -> str | None: toks = s_compact.split() if len(toks) == 1: w = toks[0] - if w in {"book", "ebook", "libro", "pdf"}: + if w in {"book", "ebook", "libro", "pdf", "livre", "llibre"}: return "Just one word: book" - if w in {"link", "enlace"}: + if w in {"link", "enlace", "lien", "enllac", "enllaç"}: return "Just one word: link" - if w in {"price", "precio", "cost"}: + if w in {"price", "precio", "cost", "prix", "preu"}: return "Just one word: price" # "I tried, but it didn't arrive / it doesn't work" @@ -499,15 +499,48 @@ def _question_theme(text: str) -> str | None: return "Where are you based?" # Price / cost - if any(k in s for k in ("price", "cost", "how much", "$", "€", "usd", "eur", "precio", "cuanto", "cuánto", "caro")): + if any( + k in s + for k in ( + "price", + "cost", + "how much", + "$", + "€", + "usd", + "eur", + "precio", + "cuanto", + "cuánto", + "caro", + "prix", + "preu", + ) + ): return "What does it cost?" # Link / payment link - if any(k in s for k in ("link", "send the link", "send me the link", "where is the link", "enlace", "stripe", "paypal", "checkout", "invoice")): + if any( + k in s + for k in ( + "link", + "send the link", + "send me the link", + "where is the link", + "enlace", + "lien", + "enllaç", + "enllac", + "stripe", + "paypal", + "checkout", + "invoice", + ) + ): return "Can you send the link?" # Book / ebook / pdf - if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro")): + if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro", "livre", "llibre")): return "Where do I get the book?" # Call / schedule @@ -533,7 +566,7 @@ def _question_theme(text: str) -> str | None: return "How do I book a call?" # Video - if any(k in s for k in ("video", "vídeo", "youtube")): + if any(k in s for k in ("video", "vídeo", "vidéo", "youtube")): return "Can you send the video?" # Steps / what next @@ -541,11 +574,45 @@ def _question_theme(text: str) -> str | None: return "What are the steps?" # How it works / details - if any(k in s for k in ("how does", "how it works", "how does it work", "how does this work", "como funciona", "cómo funciona", "more info", "details", "explain")): + if any( + k in s + for k in ( + "how does", + "how it works", + "how does it work", + "how does this work", + "como funciona", + "cómo funciona", + "more info", + "details", + "explain", + "comment ça marche", + "ça marche", + "com funciona", + ) + ): return "How does it work?" # What you do / what is this - if any(k in s for k in ("what is this", "what do you do", "what is it", "what do you offer", "service", "services", "que es", "qué es", "que haces", "qué haces", "de que va", "de qué va")): + if any( + k in s + for k in ( + "what is this", + "what do you do", + "what is it", + "what do you offer", + "service", + "services", + "que es", + "qué es", + "que haces", + "qué haces", + "de que va", + "de qué va", + "c'est quoi", + "cest quoi", + ) + ): return "What is this?" # Trust / legitimacy @@ -597,6 +664,8 @@ def _offer_terms(text: str) -> set[str]: ("ebook", "Ebook"), ("e-book", "Ebook"), ("libro", "Book"), + ("livre", "Book"), + ("llibre", "Book"), ("pdf", "PDF"), ("call", "Call"), ("llamada", "Call"), @@ -935,20 +1004,17 @@ def generate_report( now = datetime.now(timezone.utc).date().isoformat() report: list[str] = [] - report.append("# Socialmediatorr Instagram DM History : Plain-English Deep Report") + report.append("# Instagram DM History — Plain-English Deep Report") report.append("") - report.append("## DM History Deep Report") + report.append("## What This Is") report.append("") - report.append(f"**Subject:** Instagram direct messages for `@socialmediatorr`") - report.append("**Version:** v1.0 (STYLE BIBLE EN 3.0GM)") + report.append(f"**Inbox:** `@socialmediatorr`") report.append(f"**Date:** {now}") - report.append("**Status:** REVIEW REQUIRED") - report.append("**Citation:** `if://report/socialmediatorr/instagram/dm-history/`") - report.append("**Author:** Danny Stocker | InfraFabric Research") + report.append("**Time zone used:** CET") report.append("") report.append("### How This Report Was Made") report.append("") - report.append("> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.") + report.append("> This is a count of patterns. It is not a therapy note and it is not a sales ledger.") report.append("") report.append( "This document was generated by reading an Instagram data export and counting repeat patterns over time. " @@ -957,9 +1023,9 @@ def generate_report( report.append("") report.append("---") report.append("") - report.append(f"**Context:** This inbox contains a high-volume message-and-reply system over {window_days} days.") + report.append(f"**Context:** This inbox contains message history over {window_days} days.") report.append("") - report.append("> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.") + report.append("> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.") report.append("") report.append( "The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust." diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..3cad8d2 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,19 @@ +# Tools + +## Mermaid checks (Markdown diagrams) + +This repo uses Mermaid diagrams in Markdown reports. + +Local lint (partial): + +- `npm install` +- `npm run verify:mermaid` + +Notes: +- This check validates diagram types supported by `@mermaid-js/parser` (for example: `pie`). +- Some diagram types (for example: `flowchart`) are not supported by that parser yet and will be reported as `skipped`. + +Full validation (recommended): + +- Use Forgejo’s built-in PDF export for the report file. If the PDF export succeeds, the diagrams compiled successfully. + diff --git a/tools/verify_mermaid.mjs b/tools/verify_mermaid.mjs new file mode 100644 index 0000000..e215cfc --- /dev/null +++ b/tools/verify_mermaid.mjs @@ -0,0 +1,109 @@ +import fs from "node:fs"; +import path from "node:path"; +import process from "node:process"; + +import { parse } from "@mermaid-js/parser"; + +function isMarkdownFile(filePath) { + const lower = filePath.toLowerCase(); + return lower.endsWith(".md") || lower.endsWith(".markdown"); +} + +function* walkFiles(rootPath) { + const stat = fs.statSync(rootPath); + if (stat.isFile()) { + yield rootPath; + return; + } + + const entries = fs.readdirSync(rootPath, { withFileTypes: true }); + for (const ent of entries) { + const full = path.join(rootPath, ent.name); + if (ent.isDirectory()) { + yield* walkFiles(full); + } else if (ent.isFile()) { + yield full; + } + } +} + +function extractMermaidBlocks(markdownText) { + const blocks = []; + const re = /```mermaid\s*([\s\S]*?)```/g; + let m; + while ((m = re.exec(markdownText)) !== null) { + blocks.push(m[1] || ""); + } + return blocks; +} + +function detectDiagramType(code) { + const lines = String(code || "") + .replace(/\r\n?/g, "\n") + .split("\n") + .map((l) => l.trim()) + .filter((l) => l && !l.startsWith("%%")); + + if (!lines.length) return null; + const head = lines[0]; + + if (head.startsWith("pie")) return "pie"; + if (head.startsWith("gitGraph")) return "gitGraph"; + if (head.startsWith("architecture")) return "architecture"; + if (head.startsWith("packet")) return "packet"; + if (head.startsWith("info")) return "info"; + if (head.startsWith("radar")) return "radar"; + if (head.startsWith("treemap")) return "treemap"; + + // Not supported by @mermaid-js/parser yet (example: flowchart/sequence/class). + return null; +} + +async function main() { + const args = process.argv.slice(2); + const roots = args.length ? args : ["reports"]; + + let ok = true; + let total = 0; + let failures = 0; + let skipped = 0; + + for (const root of roots) { + for (const filePath of walkFiles(root)) { + if (!isMarkdownFile(filePath)) continue; + + const text = fs.readFileSync(filePath, "utf8"); + const blocks = extractMermaidBlocks(text); + if (!blocks.length) continue; + + for (let i = 0; i < blocks.length; i++) { + const code = String(blocks[i] || "").trim(); + total += 1; + const diagramType = detectDiagramType(code); + if (!diagramType) { + skipped += 1; + continue; + } + try { + await parse(diagramType, code); + } catch (err) { + ok = false; + failures += 1; + const msg = + err && typeof err === "object" && "message" in err ? String(err.message) : String(err); + console.error(`[mermaid] ${filePath} block=${i + 1} type=${diagramType}: ${msg}`); + } + } + } + } + + if (ok) { + console.log(JSON.stringify({ ok: true, diagrams: total, skipped }, null, 2)); + return 0; + } + + console.error(JSON.stringify({ ok: false, diagrams: total, failures, skipped }, null, 2)); + return 1; +} + +process.exitCode = await main();