Sanitize DM reports and add Mermaid tooling
This commit is contained in:
parent
a6222083e6
commit
a140b3787a
10 changed files with 516 additions and 427 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -7,4 +7,7 @@ venv/
|
|||
.vscode/
|
||||
/dist/
|
||||
/build/
|
||||
|
||||
node_modules/
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
|
|
|||
|
|
@ -103,6 +103,12 @@ This produces the “Sergio persona” artifacts needed for the DM agent:
|
|||
|
||||
Outputs are written with mode `600` and may contain sensitive DM content. Keep them out of git.
|
||||
|
||||
This repo includes **sanitized** example reports (no verbatim client DMs) under:
|
||||
|
||||
- `reports/socialmediatorr/`
|
||||
|
||||
Raw analysis artifacts (e.g., training pairs, rescued threads, template caches) should remain in a private working directory such as `/root/tmp/` and should not be committed.
|
||||
|
||||
### Analyze a raw Instagram export folder (recommended)
|
||||
|
||||
Optional: index first (lets you filter recency without scanning every thread):
|
||||
|
|
|
|||
174
package-lock.json
generated
Normal file
174
package-lock.json
generated
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
{
|
||||
"name": "emo-social-insta-dm-agent-tools",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "emo-social-insta-dm-agent-tools",
|
||||
"devDependencies": {
|
||||
"@mermaid-js/parser": "^0.6.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@chevrotain/cst-dts-gen": {
|
||||
"version": "11.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-11.0.3.tgz",
|
||||
"integrity": "sha512-BvIKpRLeS/8UbfxXxgC33xOumsacaeCKAjAeLyOn7Pcp95HiRbrpl14S+9vaZLolnbssPIUuiUd8IvgkRyt6NQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@chevrotain/gast": "11.0.3",
|
||||
"@chevrotain/types": "11.0.3",
|
||||
"lodash-es": "4.17.21"
|
||||
}
|
||||
},
|
||||
"node_modules/@chevrotain/gast": {
|
||||
"version": "11.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@chevrotain/gast/-/gast-11.0.3.tgz",
|
||||
"integrity": "sha512-+qNfcoNk70PyS/uxmj3li5NiECO+2YKZZQMbmjTqRI3Qchu8Hig/Q9vgkHpI3alNjr7M+a2St5pw5w5F6NL5/Q==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@chevrotain/types": "11.0.3",
|
||||
"lodash-es": "4.17.21"
|
||||
}
|
||||
},
|
||||
"node_modules/@chevrotain/regexp-to-ast": {
|
||||
"version": "11.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@chevrotain/regexp-to-ast/-/regexp-to-ast-11.0.3.tgz",
|
||||
"integrity": "sha512-1fMHaBZxLFvWI067AVbGJav1eRY7N8DDvYCTwGBiE/ytKBgP8azTdgyrKyWZ9Mfh09eHWb5PgTSO8wi7U824RA==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@chevrotain/types": {
|
||||
"version": "11.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-11.0.3.tgz",
|
||||
"integrity": "sha512-gsiM3G8b58kZC2HaWR50gu6Y1440cHiJ+i3JUvcp/35JchYejb2+5MVeJK0iKThYpAa/P2PYFV4hoi44HD+aHQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@chevrotain/utils": {
|
||||
"version": "11.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-11.0.3.tgz",
|
||||
"integrity": "sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@mermaid-js/parser": {
|
||||
"version": "0.6.3",
|
||||
"resolved": "https://registry.npmjs.org/@mermaid-js/parser/-/parser-0.6.3.tgz",
|
||||
"integrity": "sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"langium": "3.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/chevrotain": {
|
||||
"version": "11.0.3",
|
||||
"resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.0.3.tgz",
|
||||
"integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@chevrotain/cst-dts-gen": "11.0.3",
|
||||
"@chevrotain/gast": "11.0.3",
|
||||
"@chevrotain/regexp-to-ast": "11.0.3",
|
||||
"@chevrotain/types": "11.0.3",
|
||||
"@chevrotain/utils": "11.0.3",
|
||||
"lodash-es": "4.17.21"
|
||||
}
|
||||
},
|
||||
"node_modules/chevrotain-allstar": {
|
||||
"version": "0.3.1",
|
||||
"resolved": "https://registry.npmjs.org/chevrotain-allstar/-/chevrotain-allstar-0.3.1.tgz",
|
||||
"integrity": "sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"lodash-es": "^4.17.21"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"chevrotain": "^11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/langium": {
|
||||
"version": "3.3.1",
|
||||
"resolved": "https://registry.npmjs.org/langium/-/langium-3.3.1.tgz",
|
||||
"integrity": "sha512-QJv/h939gDpvT+9SiLVlY7tZC3xB2qK57v0J04Sh9wpMb6MP1q8gB21L3WIo8T5P1MSMg3Ep14L7KkDCFG3y4w==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"chevrotain": "~11.0.3",
|
||||
"chevrotain-allstar": "~0.3.0",
|
||||
"vscode-languageserver": "~9.0.1",
|
||||
"vscode-languageserver-textdocument": "~1.0.11",
|
||||
"vscode-uri": "~3.0.8"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/lodash-es": {
|
||||
"version": "4.17.21",
|
||||
"resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.21.tgz",
|
||||
"integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/vscode-jsonrpc": {
|
||||
"version": "8.2.0",
|
||||
"resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz",
|
||||
"integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/vscode-languageserver": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/vscode-languageserver/-/vscode-languageserver-9.0.1.tgz",
|
||||
"integrity": "sha512-woByF3PDpkHFUreUa7Hos7+pUWdeWMXRd26+ZX2A8cFx6v/JPTtd4/uN0/jB6XQHYaOlHbio03NTHCqrgG5n7g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"vscode-languageserver-protocol": "3.17.5"
|
||||
},
|
||||
"bin": {
|
||||
"installServerIntoExtension": "bin/installServerIntoExtension"
|
||||
}
|
||||
},
|
||||
"node_modules/vscode-languageserver-protocol": {
|
||||
"version": "3.17.5",
|
||||
"resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz",
|
||||
"integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"vscode-jsonrpc": "8.2.0",
|
||||
"vscode-languageserver-types": "3.17.5"
|
||||
}
|
||||
},
|
||||
"node_modules/vscode-languageserver-textdocument": {
|
||||
"version": "1.0.12",
|
||||
"resolved": "https://registry.npmjs.org/vscode-languageserver-textdocument/-/vscode-languageserver-textdocument-1.0.12.tgz",
|
||||
"integrity": "sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/vscode-languageserver-types": {
|
||||
"version": "3.17.5",
|
||||
"resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz",
|
||||
"integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/vscode-uri": {
|
||||
"version": "3.0.8",
|
||||
"resolved": "https://registry.npmjs.org/vscode-uri/-/vscode-uri-3.0.8.tgz",
|
||||
"integrity": "sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
}
|
||||
}
|
||||
}
|
||||
11
package.json
Normal file
11
package.json
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"name": "emo-social-insta-dm-agent-tools",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"devDependencies": {
|
||||
"@mermaid-js/parser": "^0.6.3"
|
||||
},
|
||||
"scripts": {
|
||||
"verify:mermaid": "node tools/verify_mermaid.mjs"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,163 +1,42 @@
|
|||
# Socialmediatorr Instagram DM History — Human Readable Report (English)
|
||||
# Instagram DM History — Short Report (English)
|
||||
|
||||
- Generated: `2025-12-24T02:28:34+00:00`
|
||||
- Owner name used: `Sergio de Vocht`
|
||||
- Inbox: `@socialmediatorr`
|
||||
|
||||
## 1) What This Dataset Represents
|
||||
## What This Is
|
||||
|
||||
This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergio’s style.
|
||||
The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio).
|
||||
This is a short, plain-English summary of the DM history scan.
|
||||
It avoids quoting private messages and it avoids storing personal identities.
|
||||
|
||||
## 2) High-Level Metrics (All-Time)
|
||||
## Key Numbers
|
||||
|
||||
- Conversations analyzed: **10,061**
|
||||
- Bot-only conversations: **1,883** (18.7%)
|
||||
- Human-intervened conversations: **8,153** (81.0%)
|
||||
- Conversion (intent signals): **1,923** (19.1%)
|
||||
- Conversion (confirmed signals): **55** (0.5%)
|
||||
- Conversations that stayed template-only: **1,883** (18.7%)
|
||||
- Conversations that included custom replies: **8,153** (81.0%)
|
||||
- Buying/booking signals (weak): **1,923** (19.1%)
|
||||
- Buying/booking signals (strong): **55** (0.5%)
|
||||
|
||||
Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger.
|
||||
Buying/booking signals are detected from text patterns (they are not a payment ledger).
|
||||
|
||||
## 3) Sergio Persona (From Manual/Hybrid Replies)
|
||||
## What You Need to Know
|
||||
|
||||
- Typical reply length: median **60.0** chars (p90 **67.0**)
|
||||
- Questions: **2.4%** | Exclamations: **1.7%** | Emoji: **0.0%**
|
||||
- Language guess (manual replies): en=8043, es=423, unknown=224
|
||||
The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.
|
||||
For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:
|
||||
|
||||
Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.
|
||||
- `reports/socialmediatorr/dm_history_report_en_detailed.md`
|
||||
|
||||
## 4) Bot vs Human Segmentation (What It Means)
|
||||
## Useful Inventory (Safe Counts Only)
|
||||
|
||||
- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).
|
||||
- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).
|
||||
- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity).
|
||||
- Total outgoing templates detected: **8,550**
|
||||
- High-frequency repeat templates: **24**
|
||||
- “Rescue” events detected: **7**
|
||||
- Training pairs (user → reply) available: **524**
|
||||
|
||||
This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune.
|
||||
## What You Do Not Need to Know
|
||||
|
||||
## 5) Top Detected Script Templates (Canonicalized)
|
||||
|
||||
- BOT #1: sent **2495**× — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
|
||||
- BOT #2: sent **2483**× — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien contigo mism`
|
||||
- BOT #3: sent **2483**× — `te lo dejo por aquã dame un minuto`
|
||||
- BOT #4: sent **2483**× — `me gustarã a saber tu opiniã³n`
|
||||
- BOT #5: sent **1878**× — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando paz`
|
||||
- BOT #6: sent **1878**× — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influyendo en tus relacione`
|
||||
- BOT #7: sent **706**× — `gracias por ese feedback ð`
|
||||
- BOT #8: sent **706**× — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s conflicto ãºltimament`
|
||||
- BOT #9: sent **680**× — `you sent a private reply to a comment on your instagram post`
|
||||
- BOT #10: sent **469**× — `por cierto`
|
||||
|
||||
## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)
|
||||
|
||||
- MANUAL-ish #1: seen **10**× — `quã bonito leer eso a veces entender las palabras abre puertas nuevas â sientes que en tu entorno hay algo que te gustarã a armonizar mã s`
|
||||
- MANUAL-ish #2: seen **7**× — `buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c`
|
||||
- MANUAL-ish #3: seen **5**× — `hola buenas como estas ð espero que estã s bien me gustarã a saber que es lo q te ha echo estar aquã y querer saber mã s sobre nuestras formaciã³n`
|
||||
- MANUAL-ish #4: seen **5**× — `y si pudieras resolver esto cã³mo crees que cambiarã a tu forma de relacionarte o sentirte`
|
||||
- MANUAL-ish #5: seen **5**× — `para conocerte un poquito mã s que te gustarã a conseguir con emosocial cual es tu mayor desafã o actualmente dentro de tus relaciones`
|
||||
- MANUAL-ish #6: seen **4**× — `okey te entiendo perfectamente ð segãºn lo que me comentas creo que esta lista de videos de youtube te va a venir genial para empezar a entender las bases del c`
|
||||
- MANUAL-ish #7: seen **4**× — `buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c`
|
||||
- MANUAL-ish #8: seen **3**× — `hola buenas como estas espero que bien cuã ntame que te parece el contenido que estamos ofreciendo por whatsapp te leoð ð`
|
||||
|
||||
## 7) Bot Template Performance (Reply/Conversion Heuristics)
|
||||
|
||||
These come from `bot_performance_audit.csv` and are computed per canonical bot template.
|
||||
|
||||
### Most-used bot templates (by volume)
|
||||
- sent=2495 reply_rate=0.376 intent_rate=0.0766 confirmed_rate=0.012 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
|
||||
- sent=2483 reply_rate=0.0334 intent_rate=0.0769 confirmed_rate=0.0121 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co`
|
||||
- sent=2483 reply_rate=0.1188 intent_rate=0.0769 confirmed_rate=0.0121 — `te lo dejo por aquã dame un minuto`
|
||||
- sent=2483 reply_rate=0.0028 intent_rate=0.0769 confirmed_rate=0.0121 — `me gustarã a saber tu opiniã³n`
|
||||
- sent=1878 reply_rate=0.0 intent_rate=0.0 confirmed_rate=0.0005 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando pa`
|
||||
- sent=1878 reply_rate=0.1768 intent_rate=0.0 confirmed_rate=0.0005 — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influye`
|
||||
- sent=706 reply_rate=0.0042 intent_rate=0.1048 confirmed_rate=0.017 — `gracias por ese feedback ð`
|
||||
- sent=706 reply_rate=0.8187 intent_rate=0.1048 confirmed_rate=0.017 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c`
|
||||
|
||||
### Best reply-rate bot templates
|
||||
- reply_rate=0.8187 sent=706 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c`
|
||||
- reply_rate=0.7143 sent=98 — `pudiste entrar correctamente`
|
||||
- reply_rate=0.7022 sent=178 — `por favor toca una de las siguientes opciones ð`
|
||||
- reply_rate=0.4701 sent=134 — `pudiste verlo`
|
||||
- reply_rate=0.4602 sent=176 — `que te pareciã³ ese diccionario hay alguna palabra que sueles utilizar y no te habã as dado cuenta`
|
||||
- reply_rate=0.376 sent=2495 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
|
||||
- reply_rate=0.3458 sent=240 — `gracias por tu sinceridad ð`
|
||||
- reply_rate=0.3291 sent=158 — `te dejo este video donde explico por quã las relaciones de pareja entran en conflicto aunque haya amor`
|
||||
|
||||
### Worst reply-rate bot templates
|
||||
- reply_rate=0.0 sent=1878 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando pa`
|
||||
- reply_rate=0.0 sent=337 — `enhorabuena por querer dar ese cambio estã s a un paso de transformar tu relaciã³n en solo <NUM> dã as te invito a un taller exclusivo donde`
|
||||
- reply_rate=0.0 sent=158 — `gracias por compartirlo â ï`
|
||||
- reply_rate=0.0 sent=131 — `entiendo perfectamente ð`
|
||||
- reply_rate=0.0 sent=54 — `this account can t receive your message because they don t allow new message requests from everyone`
|
||||
- reply_rate=0.0028 sent=2483 — `me gustarã a saber tu opiniã³n`
|
||||
- reply_rate=0.0042 sent=706 — `gracias por ese feedback ð`
|
||||
- reply_rate=0.0334 sent=2483 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co`
|
||||
|
||||
## 8) Objections → Best Sergio Replies (Playbook)
|
||||
|
||||
### price
|
||||
- (1) Ey Alex que tal
|
||||
- (1) Qué bonito leer eso. A veces entender las palabras abre puertas nuevas. ¿Sientes que en tu entorno hay algo que te gustarÃa armonizar más?
|
||||
- (1) Y que es lo que te impide dar ese cambio? Te veo con mucha seguridad
|
||||
### time
|
||||
- (1) Brutal esto que dices
|
||||
- (1) No es una herida ELA! Apego que no te dieron tus padres es solo una parte del espectro, necesitamos validación del mundo y de forma constante, no es una herida del pasado es algo que falta darnos en el presente.
|
||||
- (1) Vaya, suena bastante frustrante el hecho de querer "bajar esa guardia", y sentir que cuando lo haces, todo cambia
|
||||
### trust
|
||||
- (2) Hola Dani, gracias por el mensaje bonito, de verdad. Me alegra mucho saber que el contenido te está ayudando a mirar las cosas desde otro punto de vista
|
||||
- (2) Qué bonito leer eso. A veces entender las palabras abre puertas nuevas. ¿Sientes que en tu entorno hay algo que te gustarÃa armonizar más?
|
||||
- (2) En la plataforma no sale por ningún lugar, y normalmente siempre llegan 2 emails, 1 de confirmación de pago y otro de bienvenida
|
||||
|
||||
## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)
|
||||
|
||||
- Rescue events detected (heuristic): **7**
|
||||
A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal.
|
||||
|
||||
## 10) Product / Offer Evolution (Eras)
|
||||
|
||||
This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly.
|
||||
|
||||
Recent quarters (top extracted offer signals):
|
||||
- stripe(1)
|
||||
- book(1912); ebook(1912); call(8); calendly(7); coaching(2); stripe(2); pdf(2); paypal(1)
|
||||
|
||||
## 11) Charts
|
||||
|
||||
- Bot fatigue (weekly reply rate to the dominant bot script): `bot_fatigue_chart.png`
|
||||
|
||||

|
||||
- Editorial timeline (top bot scripts vs conversions): `editorial_timeline.png`
|
||||
|
||||

|
||||
|
||||
## 12) What To Build From This (Agent Requirements)
|
||||
|
||||
### Core behavior
|
||||
- Start with top bot templates for predictable openers and FAQ-style flows.
|
||||
- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.
|
||||
- Use a rescue cadence (time-based triggers) after silence.
|
||||
|
||||
### Data products to drive the agent
|
||||
- Training pairs (manual-only, converted threads): `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/training_pairs.jsonl` (rows: ~524)
|
||||
- Objection handlers: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/objection_handlers.json`
|
||||
- Rescue playbook: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/rescue_playbook.json`
|
||||
- Script templates + editorial drift: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/top_outgoing_templates.json`
|
||||
|
||||
### Safety boundaries (recommended)
|
||||
- Never request or store passwords/2FA codes.
|
||||
- Avoid medical/legal/financial advice; redirect to a call or a human.
|
||||
- If user asks to move off-platform, follow Sergio’s historical policy and business rules.
|
||||
|
||||
## 13) What We Do NOT Need To Know (Ignore / Do Not Store)
|
||||
|
||||
- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.
|
||||
- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.
|
||||
- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.
|
||||
- Individual one-off edge cases that never repeat (unless they represent a safety boundary).
|
||||
- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.
|
||||
|
||||
## 14) Caveats / Gaps
|
||||
|
||||
- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.
|
||||
- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.
|
||||
- Language detection is heuristic; improve it if you need precise bilingual routing.
|
||||
Do not store or copy these into an automation system unless you have a clear operational reason:
|
||||
- Names, handles, phone numbers, emails.
|
||||
- Full conversation transcripts for every thread.
|
||||
- Photos, videos, audio, and other attachments.
|
||||
- One-off edge cases that never repeat.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,25 +1,22 @@
|
|||
# Socialmediatorr Instagram DM History : Plain-English Deep Report
|
||||
# Instagram DM History — Plain-English Deep Report
|
||||
|
||||
## DM History Deep Report
|
||||
## What This Is
|
||||
|
||||
**Subject:** Instagram direct messages for `@socialmediatorr`
|
||||
**Version:** v1.0 (STYLE BIBLE EN 3.0GM)
|
||||
**Inbox:** `@socialmediatorr`
|
||||
**Date:** 2025-12-24
|
||||
**Status:** REVIEW REQUIRED
|
||||
**Citation:** `if://report/socialmediatorr/instagram/dm-history/`
|
||||
**Author:** Danny Stocker | InfraFabric Research
|
||||
**Time zone used:** CET
|
||||
|
||||
### How This Report Was Made
|
||||
|
||||
> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.
|
||||
> This is a count of patterns. It is not a therapy note and it is not a sales ledger.
|
||||
|
||||
This document was generated by reading an Instagram data export and counting repeat patterns over time. It avoids quoting private client messages and it avoids storing personal identities.
|
||||
|
||||
---
|
||||
|
||||
**Context:** This inbox contains a high-volume message-and-reply system over 429 days.
|
||||
**Context:** This inbox contains message history over 429 days.
|
||||
|
||||
> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.
|
||||
> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.
|
||||
|
||||
The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust.
|
||||
|
||||
|
|
@ -35,7 +32,7 @@ Across the observed window, you sent a very large number of messages and you rec
|
|||
| Total messages | 54,069 | Instagram export |
|
||||
| Messages you sent | 43,607 | Instagram export |
|
||||
| Messages people sent you | 10,462 | Instagram export |
|
||||
| Messages that look like a question or a request | 2,713 | Instagram export |
|
||||
| Messages that look like a question or a request | 2,715 | Instagram export |
|
||||
| System messages about new followers (auto text in the inbox) | 8,081 | Instagram export |
|
||||
|
||||
### What You Need to Know (In Plain English)
|
||||
|
|
@ -67,7 +64,7 @@ To avoid guesswork, we start with 3-month blocks (a simple way to smooth noise),
|
|||
| 2025 Jan-Mar | 21 | 0 | 0 |
|
||||
| 2025 Apr-Jun | 92 | 97 | 15 |
|
||||
| 2025 Jul-Sep | 623 | 882 | 89 |
|
||||
| 2025 Oct-Dec | 9,712 | 42,628 | 2,609 |
|
||||
| 2025 Oct-Dec | 9,712 | 42,628 | 2,611 |
|
||||
|
||||
Same data as charts:
|
||||
|
||||
|
|
@ -112,8 +109,8 @@ This month-by-month table is the clearest view of how the inbox changed over tim
|
|||
| 2025-08 | 193 | 230 | 28 | 50.0% |
|
||||
| 2025-09 | 284 | 330 | 24 | 20.8% |
|
||||
| 2025-10 | 787 | 1,190 | 64 | 17.2% |
|
||||
| 2025-11 | 854 | 2,194 | 149 | 46.3% |
|
||||
| 2025-12 | 8,071 | 39,244 | 2,396 | 89.6% |
|
||||
| 2025-11 | 854 | 2,194 | 150 | 46.7% |
|
||||
| 2025-12 | 8,071 | 39,244 | 2,397 | 89.7% |
|
||||
|
||||
The busiest month was **2025-12** with **47,315** messages total (87.5% of everything in this export). That single month dominates the shape of the data.
|
||||
|
||||
|
|
@ -126,7 +123,7 @@ Use this to time follow-ups and first messages. Do not spread effort evenly acro
|
|||
| Day of week | Messages from people | Messages you sent | Questions/requests |
|
||||
|---|---:|---:|---:|
|
||||
| Monday | 1,600 | 8,359 | 131 |
|
||||
| Tuesday | 1,939 | 9,654 | 192 |
|
||||
| Tuesday | 1,939 | 9,654 | 194 |
|
||||
| Wednesday | 1,282 | 5,554 | 159 |
|
||||
| Thursday | 2,261 | 6,908 | 1,268 |
|
||||
| Friday | 1,705 | 5,733 | 803 |
|
||||
|
|
@ -184,7 +181,7 @@ One caution: “fast replies” are often repeat messages. This section shows ov
|
|||
| Typical time to reply to questions/requests | 2 seconds | Instagram export |
|
||||
| Slow end for questions/requests (90% are faster) | 4 seconds | Instagram export |
|
||||
| Messages from people answered within 48 hours | 7,467 (71.4%) | Instagram export |
|
||||
| Questions/requests answered within 48 hours | 2,278 (84.0%) | Instagram export |
|
||||
| Questions/requests answered within 48 hours | 2,280 (84.0%) | Instagram export |
|
||||
|
||||
Breakdown by message type (repeat messages vs custom messages):
|
||||
|
||||
|
|
@ -242,11 +239,11 @@ This list is grouped by meaning (not by exact wording). It includes very short r
|
|||
| Rank | Topic (plain English) | Count | Share of all questions/requests |
|
||||
|---:|---|---:|---:|
|
||||
| 1 | Just one word: book | 1,857 | 68.4% |
|
||||
| 2 | What is this? | 206 | 7.6% |
|
||||
| 3 | Can you send the video? | 191 | 7.0% |
|
||||
| 4 | Other question | 120 | 4.4% |
|
||||
| 2 | What is this? | 203 | 7.5% |
|
||||
| 3 | Can you send the video? | 189 | 7.0% |
|
||||
| 4 | Other question | 118 | 4.3% |
|
||||
| 5 | Can you help me? | 74 | 2.7% |
|
||||
| 6 | Can you send the link? | 61 | 2.2% |
|
||||
| 6 | Can you send the link? | 70 | 2.6% |
|
||||
| 7 | What does it cost? | 53 | 2.0% |
|
||||
| 8 | Is this therapy? | 44 | 1.6% |
|
||||
| 9 | Where do I get the book? | 36 | 1.3% |
|
||||
|
|
@ -261,12 +258,12 @@ This list is grouped by meaning (not by exact wording). It includes very short r
|
|||
| 18 | Can I get a refund? | 1 | 0.0% |
|
||||
| 19 | How long does it take? | 1 | 0.0% |
|
||||
|
||||
In plain terms: **1,893** of **2,713** questions/requests are about the book (69.8%).
|
||||
In plain terms: **1,893** of **2,715** questions/requests are about the book (69.7%).
|
||||
|
||||
```mermaid
|
||||
pie title Questions/Requests: Book vs Everything Else
|
||||
"Book" : 1893
|
||||
"Everything else" : 820
|
||||
"Everything else" : 822
|
||||
```
|
||||
|
||||
### Content Patterns (What You Mention When You Sell)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import statistics
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
|
@ -21,11 +19,6 @@ def _load_json(path: Path) -> dict[str, Any]:
|
|||
return json.loads(path.read_text(encoding="utf-8", errors="replace"))
|
||||
|
||||
|
||||
def _read_csv(path: Path) -> list[dict[str, str]]:
|
||||
with path.open("r", encoding="utf-8", newline="") as f:
|
||||
return list(csv.DictReader(f))
|
||||
|
||||
|
||||
def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
|
||||
n = 0
|
||||
with path.open("r", encoding="utf-8", errors="replace") as f:
|
||||
|
|
@ -36,284 +29,115 @@ def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
|
|||
return n
|
||||
|
||||
|
||||
def _pct(x: float) -> str:
|
||||
return f"{x*100:.1f}%"
|
||||
def _pct(num: int, den: int) -> str:
|
||||
return "n/a" if den <= 0 else f"{(num/den)*100:.1f}%"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReportInputs:
|
||||
summary: Path
|
||||
templates: Path
|
||||
bot_audit: Path
|
||||
objections: Path
|
||||
rescue: Path
|
||||
eras: Path
|
||||
training_pairs: Path
|
||||
fatigue_png: Path
|
||||
editorial_png: Path
|
||||
|
||||
|
||||
def _resolve_inputs(analysis_dir: Path) -> ReportInputs:
|
||||
return ReportInputs(
|
||||
summary=analysis_dir / "summary.json",
|
||||
templates=analysis_dir / "top_outgoing_templates.json",
|
||||
bot_audit=analysis_dir / "bot_performance_audit.csv",
|
||||
objections=analysis_dir / "objection_handlers.json",
|
||||
rescue=analysis_dir / "rescue_playbook.json",
|
||||
eras=analysis_dir / "sergio_eras.csv",
|
||||
training_pairs=analysis_dir / "training_pairs.jsonl",
|
||||
fatigue_png=analysis_dir / "bot_fatigue_chart.png",
|
||||
editorial_png=analysis_dir / "editorial_timeline.png",
|
||||
)
|
||||
|
||||
|
||||
def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:
|
||||
inp = _resolve_inputs(analysis_dir)
|
||||
for p in inp.__dict__.values():
|
||||
if not Path(p).exists():
|
||||
raise FileNotFoundError(str(p))
|
||||
if not inp.summary.exists():
|
||||
raise FileNotFoundError(str(inp.summary))
|
||||
|
||||
summary = _load_json(inp.summary)
|
||||
templates = _load_json(inp.templates)
|
||||
objections = _load_json(inp.objections)
|
||||
rescues = _load_json(inp.rescue)
|
||||
bot_audit = _read_csv(inp.bot_audit)
|
||||
|
||||
owner = summary.get("owner_name") or "Unknown"
|
||||
conv = summary.get("conversations") or {}
|
||||
conv_total = int(conv.get("total") or 0)
|
||||
bot_only = int(conv.get("bot_only") or 0)
|
||||
human = int(conv.get("human_intervened") or 0)
|
||||
conversions = summary.get("conversions") or {}
|
||||
conv_intent = int(conversions.get("intent") or 0)
|
||||
conv_confirmed = int(conversions.get("confirmed") or 0)
|
||||
template_only = int(conv.get("bot_only") or 0)
|
||||
custom_replies = int(conv.get("human_intervened") or 0)
|
||||
|
||||
bot_only_rate = (bot_only / conv_total) if conv_total else 0.0
|
||||
human_rate = (human / conv_total) if conv_total else 0.0
|
||||
intent_rate = (conv_intent / conv_total) if conv_total else 0.0
|
||||
confirmed_rate = (conv_confirmed / conv_total) if conv_total else 0.0
|
||||
buying = summary.get("conversions") or {}
|
||||
buying_weak = int(buying.get("intent") or 0)
|
||||
buying_strong = int(buying.get("confirmed") or 0)
|
||||
|
||||
manual_style = summary.get("manual_style") or {}
|
||||
median_len = manual_style.get("median_len_chars")
|
||||
p90_len = manual_style.get("p90_len_chars")
|
||||
question_rate = float(manual_style.get("question_rate") or 0.0)
|
||||
exclaim_rate = float(manual_style.get("exclaim_rate") or 0.0)
|
||||
emoji_rate = float(manual_style.get("emoji_rate") or 0.0)
|
||||
lang_guess = manual_style.get("lang_guess") or {}
|
||||
templates_total = None
|
||||
templates_repeat = None
|
||||
if inp.templates.exists():
|
||||
t = _load_json(inp.templates)
|
||||
templates_total = int(t.get("templates_total") or 0)
|
||||
templates_repeat = int(t.get("bot_templates") or 0)
|
||||
|
||||
# Templates: prefer canonical strings (safe-ish) and avoid raw samples.
|
||||
top_templates = templates.get("top_templates") or []
|
||||
top_bot = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "bot"]
|
||||
top_manual = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "manual"]
|
||||
|
||||
# Bot audit: best/worst by reply_rate.
|
||||
def fnum(v: str | None) -> float:
|
||||
rescue_count = None
|
||||
if inp.rescue.exists():
|
||||
try:
|
||||
return float(v or 0)
|
||||
rescue = _load_json(inp.rescue)
|
||||
rescue_count = len(rescue) if isinstance(rescue, list) else 0
|
||||
except Exception:
|
||||
return 0.0
|
||||
rescue_count = None
|
||||
|
||||
bot_audit_sorted = sorted(bot_audit, key=lambda r: fnum(r.get("sent")), reverse=True)
|
||||
top_audit = bot_audit_sorted[:10]
|
||||
best_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")), reverse=True)[:10]
|
||||
worst_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")))[:10]
|
||||
pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000) if inp.training_pairs.exists() else None
|
||||
|
||||
# Objections: most common replies per category.
|
||||
objection_blocks: list[str] = []
|
||||
if isinstance(objections, dict):
|
||||
for cat in ("price", "time", "trust", "stop"):
|
||||
replies = objections.get(cat) or []
|
||||
if not isinstance(replies, list) or not replies:
|
||||
continue
|
||||
top3 = []
|
||||
for r in replies[:3]:
|
||||
if not isinstance(r, dict):
|
||||
continue
|
||||
top3.append(f"- ({r.get('count')}) {r.get('reply')}")
|
||||
if top3:
|
||||
objection_blocks.append(f"### {cat}\n" + "\n".join(top3))
|
||||
generated_at = summary.get("generated_at") if isinstance(summary.get("generated_at"), str) else None
|
||||
|
||||
rescue_count = len(rescues) if isinstance(rescues, list) else 0
|
||||
pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000)
|
||||
|
||||
# Era summary: simple high-level notes.
|
||||
eras_rows = _read_csv(inp.eras)
|
||||
era_recent = eras_rows[-6:] if len(eras_rows) > 6 else eras_rows
|
||||
era_offer_terms: list[str] = []
|
||||
for row in era_recent:
|
||||
offers = (row.get("top_offers") or "").strip()
|
||||
if offers:
|
||||
era_offer_terms.append(offers)
|
||||
|
||||
# A few derived notes.
|
||||
lang_line = ", ".join(f"{k}={v}" for k, v in lang_guess.items())
|
||||
|
||||
# Summarize bot fatigue trend from image existence only (analysis already made it).
|
||||
report = []
|
||||
report.append("# Socialmediatorr Instagram DM History — Human Readable Report (English)")
|
||||
report: list[str] = []
|
||||
report.append("# Instagram DM History — Short Report (English)")
|
||||
report.append("")
|
||||
report.append(f"- Generated: `{summary.get('generated_at')}`")
|
||||
report.append(f"- Owner name used: `{owner}`")
|
||||
if generated_at:
|
||||
report.append(f"- Generated: `{generated_at}`")
|
||||
report.append("- Inbox: `@socialmediatorr`")
|
||||
report.append("")
|
||||
|
||||
report.append("## 1) What This Dataset Represents")
|
||||
report.append("## What This Is")
|
||||
report.append("")
|
||||
report.append(
|
||||
"This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergio’s style."
|
||||
)
|
||||
report.append(
|
||||
"The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio)."
|
||||
)
|
||||
report.append("This is a short, plain-English summary of the DM history scan.")
|
||||
report.append("It avoids quoting private messages and it avoids storing personal identities.")
|
||||
report.append("")
|
||||
|
||||
report.append("## 2) High-Level Metrics (All-Time)")
|
||||
report.append("## Key Numbers")
|
||||
report.append("")
|
||||
report.append(f"- Conversations analyzed: **{conv_total:,}**")
|
||||
report.append(f"- Bot-only conversations: **{bot_only:,}** ({_pct(bot_only_rate)})")
|
||||
report.append(f"- Human-intervened conversations: **{human:,}** ({_pct(human_rate)})")
|
||||
report.append(f"- Conversion (intent signals): **{conv_intent:,}** ({_pct(intent_rate)})")
|
||||
report.append(f"- Conversion (confirmed signals): **{conv_confirmed:,}** ({_pct(confirmed_rate)})")
|
||||
report.append(f"- Conversations that stayed template-only: **{template_only:,}** ({_pct(template_only, conv_total)})")
|
||||
report.append(f"- Conversations that included custom replies: **{custom_replies:,}** ({_pct(custom_replies, conv_total)})")
|
||||
report.append(f"- Buying/booking signals (weak): **{buying_weak:,}** ({_pct(buying_weak, conv_total)})")
|
||||
report.append(f"- Buying/booking signals (strong): **{buying_strong:,}** ({_pct(buying_strong, conv_total)})")
|
||||
report.append("")
|
||||
report.append(
|
||||
"Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger."
|
||||
)
|
||||
report.append("Buying/booking signals are detected from text patterns (they are not a payment ledger).")
|
||||
report.append("")
|
||||
|
||||
report.append("## 3) Sergio Persona (From Manual/Hybrid Replies)")
|
||||
report.append("## What You Need to Know")
|
||||
report.append("")
|
||||
report.append(f"- Typical reply length: median **{median_len}** chars (p90 **{p90_len}**)")
|
||||
report.append(f"- Questions: **{_pct(question_rate)}** | Exclamations: **{_pct(exclaim_rate)}** | Emoji: **{_pct(emoji_rate)}**")
|
||||
report.append(f"- Language guess (manual replies): {lang_line or 'n/a'}")
|
||||
report.append("The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.")
|
||||
report.append("For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:")
|
||||
report.append("")
|
||||
report.append("Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.")
|
||||
report.append("- `reports/socialmediatorr/dm_history_report_en_detailed.md`")
|
||||
report.append("")
|
||||
|
||||
report.append("## 4) Bot vs Human Segmentation (What It Means)")
|
||||
report.append("## Useful Inventory (Safe Counts Only)")
|
||||
report.append("")
|
||||
report.append(
|
||||
"- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).\n"
|
||||
"- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).\n"
|
||||
"- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity)."
|
||||
)
|
||||
report.append("")
|
||||
report.append(
|
||||
"This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune."
|
||||
)
|
||||
if templates_total is not None and templates_repeat is not None:
|
||||
report.append(f"- Total outgoing templates detected: **{templates_total:,}**")
|
||||
report.append(f"- High-frequency repeat templates: **{templates_repeat:,}**")
|
||||
if rescue_count is not None:
|
||||
report.append(f"- “Rescue” events detected: **{rescue_count:,}**")
|
||||
if pairs_count is not None:
|
||||
report.append(f"- Training pairs (user → reply) available: **{pairs_count:,}**")
|
||||
if templates_total is None and rescue_count is None and pairs_count is None:
|
||||
report.append("- (No additional artifacts were found next to `summary.json`.)")
|
||||
report.append("")
|
||||
|
||||
report.append("## 5) Top Detected Script Templates (Canonicalized)")
|
||||
report.append("## What You Do Not Need to Know")
|
||||
report.append("")
|
||||
if top_bot:
|
||||
for i, t in enumerate(top_bot[:10], 1):
|
||||
canon = (t.get("canonical") or "").strip()
|
||||
count = int(t.get("count") or 0)
|
||||
report.append(f"- BOT #{i}: sent **{count}**× — `{canon[:160]}`")
|
||||
else:
|
||||
report.append("- (No high-frequency bot templates detected with current thresholds.)")
|
||||
report.append("")
|
||||
|
||||
report.append("## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)")
|
||||
report.append("")
|
||||
if top_manual:
|
||||
for i, t in enumerate(top_manual[:10], 1):
|
||||
canon = (t.get("canonical") or "").strip()
|
||||
count = int(t.get("count") or 0)
|
||||
report.append(f"- MANUAL-ish #{i}: seen **{count}**× — `{canon[:160]}`")
|
||||
else:
|
||||
report.append("- (No low-frequency manual templates included in the cached top list.)")
|
||||
report.append("")
|
||||
|
||||
report.append("## 7) Bot Template Performance (Reply/Conversion Heuristics)")
|
||||
report.append("")
|
||||
report.append("These come from `bot_performance_audit.csv` and are computed per canonical bot template.")
|
||||
report.append("")
|
||||
if top_audit:
|
||||
report.append("### Most-used bot templates (by volume)")
|
||||
for r in top_audit[:8]:
|
||||
report.append(
|
||||
f"- sent={r.get('sent')} reply_rate={r.get('reply_rate')} intent_rate={r.get('conversion_intent_rate')} confirmed_rate={r.get('conversion_confirmed_rate')} — `{(r.get('canonical_template') or '')[:140]}`"
|
||||
)
|
||||
report.append("")
|
||||
if best_reply:
|
||||
report.append("### Best reply-rate bot templates")
|
||||
for r in best_reply[:8]:
|
||||
report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`")
|
||||
report.append("")
|
||||
if worst_reply:
|
||||
report.append("### Worst reply-rate bot templates")
|
||||
for r in worst_reply[:8]:
|
||||
report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`")
|
||||
report.append("")
|
||||
|
||||
report.append("## 8) Objections → Best Sergio Replies (Playbook)")
|
||||
report.append("")
|
||||
if objection_blocks:
|
||||
report.extend(objection_blocks)
|
||||
else:
|
||||
report.append("- No objection handlers detected with current keyword rules.")
|
||||
report.append("")
|
||||
|
||||
report.append("## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)")
|
||||
report.append("")
|
||||
report.append(f"- Rescue events detected (heuristic): **{rescue_count:,}**")
|
||||
report.append(
|
||||
"A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal."
|
||||
)
|
||||
report.append("")
|
||||
|
||||
report.append("## 10) Product / Offer Evolution (Eras)")
|
||||
report.append("")
|
||||
report.append(
|
||||
"This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly."
|
||||
)
|
||||
report.append("")
|
||||
if era_offer_terms:
|
||||
report.append("Recent quarters (top extracted offer signals):")
|
||||
for line in era_offer_terms:
|
||||
report.append(f"- {line}")
|
||||
else:
|
||||
report.append("- No offer signals detected in the most recent quarters with current extraction rules.")
|
||||
report.append("")
|
||||
|
||||
report.append("## 11) Charts")
|
||||
report.append("")
|
||||
report.append(f"- Bot fatigue (weekly reply rate to the dominant bot script): `{inp.fatigue_png}`")
|
||||
report.append(f"- Editorial timeline (top bot scripts vs conversions): `{inp.editorial_png}`")
|
||||
report.append("")
|
||||
|
||||
report.append("## 12) What To Build From This (Agent Requirements)")
|
||||
report.append("")
|
||||
report.append("### Core behavior")
|
||||
report.append("- Start with top bot templates for predictable openers and FAQ-style flows.")
|
||||
report.append("- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.")
|
||||
report.append("- Use a rescue cadence (time-based triggers) after silence.")
|
||||
report.append("")
|
||||
report.append("### Data products to drive the agent")
|
||||
report.append(f"- Training pairs (manual-only, converted threads): `{inp.training_pairs}` (rows: ~{pairs_count:,})")
|
||||
report.append(f"- Objection handlers: `{inp.objections}`")
|
||||
report.append(f"- Rescue playbook: `{inp.rescue}`")
|
||||
report.append(f"- Script templates + editorial drift: `{inp.templates}`")
|
||||
report.append("")
|
||||
report.append("### Safety boundaries (recommended)")
|
||||
report.append("- Never request or store passwords/2FA codes.")
|
||||
report.append("- Avoid medical/legal/financial advice; redirect to a call or a human.")
|
||||
report.append("- If user asks to move off-platform, follow Sergio’s historical policy and business rules.")
|
||||
report.append("")
|
||||
|
||||
report.append("## 13) What We Do NOT Need To Know (Ignore / Do Not Store)")
|
||||
report.append("")
|
||||
report.append("- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.")
|
||||
report.append("- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.")
|
||||
report.append("- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.")
|
||||
report.append("- Individual one-off edge cases that never repeat (unless they represent a safety boundary).")
|
||||
report.append("- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.")
|
||||
report.append("")
|
||||
|
||||
report.append("## 14) Caveats / Gaps")
|
||||
report.append("")
|
||||
report.append("- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.")
|
||||
report.append("- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.")
|
||||
report.append("- Language detection is heuristic; improve it if you need precise bilingual routing.")
|
||||
report.append("Do not store or copy these into an automation system unless you have a clear operational reason:")
|
||||
report.append("- Names, handles, phone numbers, emails.")
|
||||
report.append("- Full conversation transcripts for every thread.")
|
||||
report.append("- Photos, videos, audio, and other attachments.")
|
||||
report.append("- One-off edge cases that never repeat.")
|
||||
report.append("")
|
||||
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -323,19 +147,20 @@ def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:
|
|||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
ap = argparse.ArgumentParser(description="Generate a human-readable English report from analyze_instagram_export outputs.")
|
||||
ap.add_argument("--analysis-dir", required=True, help="directory produced by analyze_instagram_export (contains summary.json)")
|
||||
ap.add_argument("--out", default=None, help="output markdown path (default: <analysis-dir>/dm_history_report_en.md)")
|
||||
ap = argparse.ArgumentParser(description="Generate a short, safe DM history report from an analysis directory.")
|
||||
ap.add_argument("--analysis-dir", required=True, help="analyze_instagram_export output directory")
|
||||
ap.add_argument("--out", default=None, help="output markdown path (default: dm_history_report_en.md in CWD)")
|
||||
args = ap.parse_args(argv)
|
||||
|
||||
analysis_dir = Path(args.analysis_dir)
|
||||
out_path = Path(args.out) if args.out else (analysis_dir / "dm_history_report_en.md")
|
||||
out_path = Path(args.out) if args.out else (Path.cwd() / "dm_history_report_en.md")
|
||||
|
||||
try:
|
||||
p = generate_report(analysis_dir=analysis_dir, out_path=out_path)
|
||||
print(json.dumps({"ok": True, "out": str(p)}, ensure_ascii=False))
|
||||
return 0
|
||||
except FileNotFoundError as e:
|
||||
print(f"Missing required input: {e}", file=os.sys.stderr)
|
||||
print(f"Missing analysis input: {e}", file=os.sys.stderr)
|
||||
return 2
|
||||
except Exception as e:
|
||||
print(f"Report generation failed: {e}", file=os.sys.stderr)
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from typing import Any, Iterable, Literal
|
|||
|
||||
from .analyze_instagram_export import canonicalize_text
|
||||
|
||||
DEFAULT_LOCAL_TZ_NAME = "Europe/Brussels"
|
||||
DEFAULT_LOCAL_TZ_NAME = "Europe/Paris"
|
||||
|
||||
|
||||
def _safe_chmod_600(path: Path) -> None:
|
||||
|
|
@ -418,11 +418,11 @@ def _question_theme(text: str) -> str | None:
|
|||
toks = s_compact.split()
|
||||
if len(toks) == 1:
|
||||
w = toks[0]
|
||||
if w in {"book", "ebook", "libro", "pdf"}:
|
||||
if w in {"book", "ebook", "libro", "pdf", "livre", "llibre"}:
|
||||
return "Just one word: book"
|
||||
if w in {"link", "enlace"}:
|
||||
if w in {"link", "enlace", "lien", "enllac", "enllaç"}:
|
||||
return "Just one word: link"
|
||||
if w in {"price", "precio", "cost"}:
|
||||
if w in {"price", "precio", "cost", "prix", "preu"}:
|
||||
return "Just one word: price"
|
||||
|
||||
# "I tried, but it didn't arrive / it doesn't work"
|
||||
|
|
@ -499,15 +499,48 @@ def _question_theme(text: str) -> str | None:
|
|||
return "Where are you based?"
|
||||
|
||||
# Price / cost
|
||||
if any(k in s for k in ("price", "cost", "how much", "$", "€", "usd", "eur", "precio", "cuanto", "cuánto", "caro")):
|
||||
if any(
|
||||
k in s
|
||||
for k in (
|
||||
"price",
|
||||
"cost",
|
||||
"how much",
|
||||
"$",
|
||||
"€",
|
||||
"usd",
|
||||
"eur",
|
||||
"precio",
|
||||
"cuanto",
|
||||
"cuánto",
|
||||
"caro",
|
||||
"prix",
|
||||
"preu",
|
||||
)
|
||||
):
|
||||
return "What does it cost?"
|
||||
|
||||
# Link / payment link
|
||||
if any(k in s for k in ("link", "send the link", "send me the link", "where is the link", "enlace", "stripe", "paypal", "checkout", "invoice")):
|
||||
if any(
|
||||
k in s
|
||||
for k in (
|
||||
"link",
|
||||
"send the link",
|
||||
"send me the link",
|
||||
"where is the link",
|
||||
"enlace",
|
||||
"lien",
|
||||
"enllaç",
|
||||
"enllac",
|
||||
"stripe",
|
||||
"paypal",
|
||||
"checkout",
|
||||
"invoice",
|
||||
)
|
||||
):
|
||||
return "Can you send the link?"
|
||||
|
||||
# Book / ebook / pdf
|
||||
if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro")):
|
||||
if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro", "livre", "llibre")):
|
||||
return "Where do I get the book?"
|
||||
|
||||
# Call / schedule
|
||||
|
|
@ -533,7 +566,7 @@ def _question_theme(text: str) -> str | None:
|
|||
return "How do I book a call?"
|
||||
|
||||
# Video
|
||||
if any(k in s for k in ("video", "vídeo", "youtube")):
|
||||
if any(k in s for k in ("video", "vídeo", "vidéo", "youtube")):
|
||||
return "Can you send the video?"
|
||||
|
||||
# Steps / what next
|
||||
|
|
@ -541,11 +574,45 @@ def _question_theme(text: str) -> str | None:
|
|||
return "What are the steps?"
|
||||
|
||||
# How it works / details
|
||||
if any(k in s for k in ("how does", "how it works", "how does it work", "how does this work", "como funciona", "cómo funciona", "more info", "details", "explain")):
|
||||
if any(
|
||||
k in s
|
||||
for k in (
|
||||
"how does",
|
||||
"how it works",
|
||||
"how does it work",
|
||||
"how does this work",
|
||||
"como funciona",
|
||||
"cómo funciona",
|
||||
"more info",
|
||||
"details",
|
||||
"explain",
|
||||
"comment ça marche",
|
||||
"ça marche",
|
||||
"com funciona",
|
||||
)
|
||||
):
|
||||
return "How does it work?"
|
||||
|
||||
# What you do / what is this
|
||||
if any(k in s for k in ("what is this", "what do you do", "what is it", "what do you offer", "service", "services", "que es", "qué es", "que haces", "qué haces", "de que va", "de qué va")):
|
||||
if any(
|
||||
k in s
|
||||
for k in (
|
||||
"what is this",
|
||||
"what do you do",
|
||||
"what is it",
|
||||
"what do you offer",
|
||||
"service",
|
||||
"services",
|
||||
"que es",
|
||||
"qué es",
|
||||
"que haces",
|
||||
"qué haces",
|
||||
"de que va",
|
||||
"de qué va",
|
||||
"c'est quoi",
|
||||
"cest quoi",
|
||||
)
|
||||
):
|
||||
return "What is this?"
|
||||
|
||||
# Trust / legitimacy
|
||||
|
|
@ -597,6 +664,8 @@ def _offer_terms(text: str) -> set[str]:
|
|||
("ebook", "Ebook"),
|
||||
("e-book", "Ebook"),
|
||||
("libro", "Book"),
|
||||
("livre", "Book"),
|
||||
("llibre", "Book"),
|
||||
("pdf", "PDF"),
|
||||
("call", "Call"),
|
||||
("llamada", "Call"),
|
||||
|
|
@ -935,20 +1004,17 @@ def generate_report(
|
|||
now = datetime.now(timezone.utc).date().isoformat()
|
||||
report: list[str] = []
|
||||
|
||||
report.append("# Socialmediatorr Instagram DM History : Plain-English Deep Report")
|
||||
report.append("# Instagram DM History — Plain-English Deep Report")
|
||||
report.append("")
|
||||
report.append("## DM History Deep Report")
|
||||
report.append("## What This Is")
|
||||
report.append("")
|
||||
report.append(f"**Subject:** Instagram direct messages for `@socialmediatorr`")
|
||||
report.append("**Version:** v1.0 (STYLE BIBLE EN 3.0GM)")
|
||||
report.append(f"**Inbox:** `@socialmediatorr`")
|
||||
report.append(f"**Date:** {now}")
|
||||
report.append("**Status:** REVIEW REQUIRED")
|
||||
report.append("**Citation:** `if://report/socialmediatorr/instagram/dm-history/`")
|
||||
report.append("**Author:** Danny Stocker | InfraFabric Research")
|
||||
report.append("**Time zone used:** CET")
|
||||
report.append("")
|
||||
report.append("### How This Report Was Made")
|
||||
report.append("")
|
||||
report.append("> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.")
|
||||
report.append("> This is a count of patterns. It is not a therapy note and it is not a sales ledger.")
|
||||
report.append("")
|
||||
report.append(
|
||||
"This document was generated by reading an Instagram data export and counting repeat patterns over time. "
|
||||
|
|
@ -957,9 +1023,9 @@ def generate_report(
|
|||
report.append("")
|
||||
report.append("---")
|
||||
report.append("")
|
||||
report.append(f"**Context:** This inbox contains a high-volume message-and-reply system over {window_days} days.")
|
||||
report.append(f"**Context:** This inbox contains message history over {window_days} days.")
|
||||
report.append("")
|
||||
report.append("> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.")
|
||||
report.append("> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.")
|
||||
report.append("")
|
||||
report.append(
|
||||
"The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust."
|
||||
|
|
|
|||
19
tools/README.md
Normal file
19
tools/README.md
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Tools
|
||||
|
||||
## Mermaid checks (Markdown diagrams)
|
||||
|
||||
This repo uses Mermaid diagrams in Markdown reports.
|
||||
|
||||
Local lint (partial):
|
||||
|
||||
- `npm install`
|
||||
- `npm run verify:mermaid`
|
||||
|
||||
Notes:
|
||||
- This check validates diagram types supported by `@mermaid-js/parser` (for example: `pie`).
|
||||
- Some diagram types (for example: `flowchart`) are not supported by that parser yet and will be reported as `skipped`.
|
||||
|
||||
Full validation (recommended):
|
||||
|
||||
- Use Forgejo’s built-in PDF export for the report file. If the PDF export succeeds, the diagrams compiled successfully.
|
||||
|
||||
109
tools/verify_mermaid.mjs
Normal file
109
tools/verify_mermaid.mjs
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
|
||||
import { parse } from "@mermaid-js/parser";
|
||||
|
||||
function isMarkdownFile(filePath) {
|
||||
const lower = filePath.toLowerCase();
|
||||
return lower.endsWith(".md") || lower.endsWith(".markdown");
|
||||
}
|
||||
|
||||
function* walkFiles(rootPath) {
|
||||
const stat = fs.statSync(rootPath);
|
||||
if (stat.isFile()) {
|
||||
yield rootPath;
|
||||
return;
|
||||
}
|
||||
|
||||
const entries = fs.readdirSync(rootPath, { withFileTypes: true });
|
||||
for (const ent of entries) {
|
||||
const full = path.join(rootPath, ent.name);
|
||||
if (ent.isDirectory()) {
|
||||
yield* walkFiles(full);
|
||||
} else if (ent.isFile()) {
|
||||
yield full;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function extractMermaidBlocks(markdownText) {
|
||||
const blocks = [];
|
||||
const re = /```mermaid\s*([\s\S]*?)```/g;
|
||||
let m;
|
||||
while ((m = re.exec(markdownText)) !== null) {
|
||||
blocks.push(m[1] || "");
|
||||
}
|
||||
return blocks;
|
||||
}
|
||||
|
||||
function detectDiagramType(code) {
|
||||
const lines = String(code || "")
|
||||
.replace(/\r\n?/g, "\n")
|
||||
.split("\n")
|
||||
.map((l) => l.trim())
|
||||
.filter((l) => l && !l.startsWith("%%"));
|
||||
|
||||
if (!lines.length) return null;
|
||||
const head = lines[0];
|
||||
|
||||
if (head.startsWith("pie")) return "pie";
|
||||
if (head.startsWith("gitGraph")) return "gitGraph";
|
||||
if (head.startsWith("architecture")) return "architecture";
|
||||
if (head.startsWith("packet")) return "packet";
|
||||
if (head.startsWith("info")) return "info";
|
||||
if (head.startsWith("radar")) return "radar";
|
||||
if (head.startsWith("treemap")) return "treemap";
|
||||
|
||||
// Not supported by @mermaid-js/parser yet (example: flowchart/sequence/class).
|
||||
return null;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const roots = args.length ? args : ["reports"];
|
||||
|
||||
let ok = true;
|
||||
let total = 0;
|
||||
let failures = 0;
|
||||
let skipped = 0;
|
||||
|
||||
for (const root of roots) {
|
||||
for (const filePath of walkFiles(root)) {
|
||||
if (!isMarkdownFile(filePath)) continue;
|
||||
|
||||
const text = fs.readFileSync(filePath, "utf8");
|
||||
const blocks = extractMermaidBlocks(text);
|
||||
if (!blocks.length) continue;
|
||||
|
||||
for (let i = 0; i < blocks.length; i++) {
|
||||
const code = String(blocks[i] || "").trim();
|
||||
total += 1;
|
||||
const diagramType = detectDiagramType(code);
|
||||
if (!diagramType) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
await parse(diagramType, code);
|
||||
} catch (err) {
|
||||
ok = false;
|
||||
failures += 1;
|
||||
const msg =
|
||||
err && typeof err === "object" && "message" in err ? String(err.message) : String(err);
|
||||
console.error(`[mermaid] ${filePath} block=${i + 1} type=${diagramType}: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
console.log(JSON.stringify({ ok: true, diagrams: total, skipped }, null, 2));
|
||||
return 0;
|
||||
}
|
||||
|
||||
console.error(JSON.stringify({ ok: false, diagrams: total, failures, skipped }, null, 2));
|
||||
return 1;
|
||||
}
|
||||
|
||||
process.exitCode = await main();
|
||||
Loading…
Add table
Reference in a new issue